swc_ecma_regexp/parser/reader/string_literal_parser/
parser_impl.rs1use super::{
2 ast,
3 characters::{
4 is_line_terminator, is_non_escape_character, is_single_escape_character, CR, LF, LS, PS,
5 },
6 diagnostics,
7 options::Options,
8};
9use crate::{diagnostics::Result, parser::span_factory::SpanFactory};
10
11type OffsetsAndCp = ((u32, u32), u32);
13
14pub fn parse_regexp_literal(
17 source_text: &str,
18 span_offset: u32,
19 combine_surrogate_pair: bool,
20) -> Vec<ast::CodePoint> {
21 let mut body = vec![];
22
23 let mut offset = 0;
24 for ch in source_text.chars() {
25 let start = offset;
26 #[expect(clippy::cast_possible_truncation)]
27 let end = start + ch.len_utf8() as u32;
28
29 let offsets_and_cp: OffsetsAndCp = ((start, end), ch as u32);
30 Parser::handle_code_point(
31 &mut body,
32 offsets_and_cp,
33 span_offset,
34 combine_surrogate_pair,
35 );
36 offset = end;
37 }
38
39 body
40}
41
42pub struct Parser {
43 chars: Vec<char>,
48 index: usize,
49 offset: u32,
50 options: Options,
51}
52
53impl Parser {
54 pub fn handle_code_point(
56 body: &mut Vec<ast::CodePoint>,
57 (offsets, cp): OffsetsAndCp,
58 span_offset: u32,
59 combine_surrogate_pair: bool,
60 ) {
61 let span = SpanFactory::span_from_u32(span_offset + offsets.0, span_offset + offsets.1);
62
63 if combine_surrogate_pair || (0..=0xffff).contains(&cp) {
64 body.push(ast::CodePoint { span, value: cp });
66 } else {
67 let (lead, trail) = (
69 0xd800 + ((cp - 0x10000) >> 10),
70 0xdc00 + ((cp - 0x10000) & 0x3ff),
71 );
72 body.push(ast::CodePoint { span, value: lead });
73 body.push(ast::CodePoint { span, value: trail });
74 }
75 }
76
77 pub fn new(source_text: &str, options: Options) -> Self {
80 Self {
81 chars: source_text.chars().collect::<Vec<_>>(),
82 index: 0,
83 offset: 0,
84 options,
85 }
86 }
87
88 pub fn parse(mut self) -> Result<ast::StringLiteral> {
94 let (quote_char, kind) = if self.eat('"') {
95 ('"', ast::StringLiteralKind::Double)
96 } else if self.eat('\'') {
97 ('\'', ast::StringLiteralKind::Single)
98 } else {
99 return Err(diagnostics::invalid_input(SpanFactory::span_from_u32(
100 self.options.span_offset,
101 self.options.span_offset,
102 )));
103 };
104
105 let body = self.parse_string_characters(quote_char)?;
106
107 if self.eat(quote_char) {
108 if self.peek().is_some() {
109 return Err(diagnostics::invalid_input(SpanFactory::span_from_u32(
110 self.options.span_offset + self.offset(),
111 self.options.span_offset + self.offset(),
112 )));
113 }
114
115 let span = SpanFactory::span_from_u32(
116 self.options.span_offset,
117 self.options.span_offset + self.offset(),
118 );
119 return Ok(ast::StringLiteral { span, kind, body });
120 }
121
122 Err(diagnostics::invalid_input(SpanFactory::span_from_u32(
123 self.options.span_offset + self.offset(),
124 self.options.span_offset + self.offset(),
125 )))
126 }
127
128 fn parse_string_characters(
138 &mut self,
139 single_or_double_quote: char,
140 ) -> Result<Vec<ast::CodePoint>> {
141 let mut body = vec![];
142 while let Some(code_point) = self.parse_string_character(single_or_double_quote)? {
143 Parser::handle_code_point(
144 &mut body,
145 code_point,
146 self.options.span_offset,
147 self.options.combine_surrogate_pair,
148 );
149 }
150 Ok(body)
151 }
152
153 fn parse_string_character(
169 &mut self,
170 single_or_double_quote: char,
171 ) -> Result<Option<OffsetsAndCp>> {
172 let offset_start = self.offset();
173 let checkpoint = self.checkpoint();
174
175 if let Some(ch) = self
176 .peek()
177 .filter(|&ch| ch != single_or_double_quote && ch != '\\' && !is_line_terminator(ch))
178 {
179 self.advance();
180 return Ok(Some(((offset_start, self.offset()), ch as u32)));
181 }
182 if self.peek() == Some(LS) {
183 self.advance();
184 return Ok(Some(((offset_start, self.offset()), LS as u32)));
185 }
186 if self.peek() == Some(PS) {
187 self.advance();
188 return Ok(Some(((offset_start, self.offset()), PS as u32)));
189 }
190 if self.eat('\\') {
191 if let Some(cp) = self.parse_escape_sequence(offset_start)? {
192 return Ok(Some(((offset_start, self.offset()), cp)));
193 }
194 self.rewind(checkpoint);
195 }
196 if let Some(cp) = self.parse_line_terminator_sequence() {
197 return Ok(Some(((offset_start, self.offset()), cp)));
198 }
199
200 Ok(None)
201 }
202
203 fn parse_escape_sequence(&mut self, offset_start: u32) -> Result<Option<u32>> {
213 if let Some(cp) = self.parse_character_escape_sequence() {
214 return Ok(Some(cp));
215 }
216 if self.peek() == Some('0') && self.peek2().map_or(true, |ch| !ch.is_ascii_digit()) {
217 self.advance();
218 return Ok(Some(0x00));
219 }
220 if let Some(cp) = self.parse_legacy_octal_escape_sequence() {
221 if self.options.strict_mode {
224 return Err(diagnostics::legacy_in_strict_mode(
225 "octal escape sequence",
226 SpanFactory::span_from_u32(
227 self.options.span_offset + offset_start,
228 self.options.span_offset + self.offset(),
229 ),
230 ));
231 }
232 return Ok(Some(cp));
233 }
234 if let Some(cp) = self.parse_non_octal_decimal_escape_sequence() {
235 if self.options.strict_mode {
238 return Err(diagnostics::legacy_in_strict_mode(
239 "non octal decimal escape sequence",
240 SpanFactory::span_from_u32(
241 self.options.span_offset + offset_start,
242 self.options.span_offset + self.offset(),
243 ),
244 ));
245 }
246 return Ok(Some(cp));
247 }
248 if let Some(cp) = self.parse_hex_escape_sequence() {
249 return Ok(Some(cp));
250 }
251 if let Some(cp) = self.parse_unicode_escape_sequence(offset_start)? {
252 return Ok(Some(cp));
253 }
254
255 Ok(None)
256 }
257
258 fn parse_character_escape_sequence(&mut self) -> Option<u32> {
264 if let Some(ch) = self.peek().filter(|&ch| is_single_escape_character(ch)) {
265 self.advance();
266 return Some(ch as u32);
267 }
268 if let Some(ch) = self.peek().filter(|&ch| is_non_escape_character(ch)) {
269 self.advance();
270 return Some(ch as u32);
271 }
272
273 None
274 }
275
276 fn parse_legacy_octal_escape_sequence(&mut self) -> Option<u32> {
294 if let Some(first) = self.consume_octal_digit() {
295 if first == 0 && self.peek().filter(|&ch| !matches!(ch, '8' | '9')).is_some() {
297 return Some(first);
298 }
299
300 if let Some(second) = self.consume_octal_digit() {
301 if let Some(third) = self.consume_octal_digit() {
302 if first <= 3 {
304 return Some(first * 64 + second * 8 + third);
305 }
306 }
307
308 return Some(first * 8 + second);
311 }
312
313 return Some(first);
315 }
316
317 None
318 }
319
320 fn parse_non_octal_decimal_escape_sequence(&mut self) -> Option<u32> {
325 if self.eat('8') {
326 return Some('8' as u32);
327 }
328 if self.eat('9') {
329 return Some('9' as u32);
330 }
331 None
332 }
333
334 fn parse_hex_escape_sequence(&mut self) -> Option<u32> {
339 let checkpoint = self.checkpoint();
340
341 if self.eat('x') {
342 if let Some(first) = self.consume_hex_digit() {
343 if let Some(second) = self.consume_hex_digit() {
344 return Some(first * 16 + second);
345 }
346 }
347
348 self.rewind(checkpoint);
349 }
350
351 None
352 }
353
354 fn parse_unicode_escape_sequence(&mut self, offset_start: u32) -> Result<Option<u32>> {
360 let chckpoint = self.checkpoint();
361
362 if self.eat('u') {
363 if let Some(cp) = self.consume_hex4_digits() {
364 return Ok(Some(cp));
365 }
366 self.rewind(chckpoint);
367 }
368
369 if self.eat('u') {
370 if self.eat('{') {
371 if let Some(hex_digits) = self
372 .consume_hex_digits(offset_start)?
373 .filter(|&cp| cp <= 0x10_ffff)
374 {
375 if self.eat('}') {
376 return Ok(Some(hex_digits));
377 }
378 }
379 }
380 self.rewind(chckpoint);
381 }
382
383 Ok(None)
384 }
385
386 fn parse_line_terminator_sequence(&mut self) -> Option<u32> {
398 let checkpoint = self.checkpoint();
399
400 if self.eat('\\') {
401 if self.peek() == Some(LF) {
402 self.advance();
403 return Some(LF as u32);
404 }
405 if self.peek() == Some(CR) && self.peek2() != Some(LF) {
406 self.advance();
407 return Some(CR as u32);
408 }
409 if self.peek() == Some(LS) {
410 self.advance();
411 return Some(LS as u32);
412 }
413 if self.peek() == Some(PS) {
414 self.advance();
415 return Some(PS as u32);
416 }
417 if self.peek() == Some(CR) && self.peek2() == Some(LF) {
421 self.advance();
422 self.advance();
423 return Some(LF as u32);
424 }
425 }
426
427 self.rewind(checkpoint);
428 None
429 }
430
431 fn consume_hex_digit(&mut self) -> Option<u32> {
434 if let Some(ch) = self.peek().filter(char::is_ascii_hexdigit) {
435 self.advance();
436 return ch.to_digit(16);
437 }
438
439 None
440 }
441
442 fn consume_octal_digit(&mut self) -> Option<u32> {
443 if let Some(ch) = self
444 .peek()
445 .filter(char::is_ascii_digit)
446 .filter(|&ch| ch < '8')
447 {
448 self.advance();
449 return Some(ch as u32 - '0' as u32);
451 }
452
453 None
454 }
455
456 fn consume_hex4_digits(&mut self) -> Option<u32> {
461 let checkpoint = self.checkpoint();
462
463 let mut value = 0;
464 for _ in 0..4 {
465 let Some(hex) = self
466 .peek()
467 .filter(char::is_ascii_hexdigit)
468 .and_then(|ch| ch.to_digit(16))
469 else {
470 self.rewind(checkpoint);
471 return None;
472 };
473
474 value = (16 * value) + hex;
475 self.advance();
476 }
477
478 Some(value)
479 }
480
481 fn consume_hex_digits(&mut self, offset_start: u32) -> Result<Option<u32>> {
482 let checkpoint = self.checkpoint();
483
484 let mut value: u32 = 0;
485 while let Some(hex) = self
486 .peek()
487 .filter(char::is_ascii_hexdigit)
488 .and_then(|ch| ch.to_digit(16))
489 {
490 if let Some(v) = value.checked_mul(16).and_then(|v| v.checked_add(hex)) {
492 value = v;
493 self.advance();
494 } else {
495 return Err(diagnostics::too_large_unicode_escape_sequence(
496 SpanFactory::span_from_u32(
497 self.options.span_offset + offset_start,
498 self.options.span_offset + self.offset(),
499 ),
500 ));
501 }
502 }
503
504 if self.checkpoint() != checkpoint {
505 return Ok(Some(value));
506 }
507
508 Ok(None)
509 }
510
511 fn checkpoint(&self) -> (usize, u32) {
514 (self.index, self.offset)
515 }
516
517 fn rewind(&mut self, checkpoint: (usize, u32)) {
518 self.index = checkpoint.0;
519 self.offset = checkpoint.1;
520 }
521
522 fn advance(&mut self) {
523 if let Some(ch) = self.chars.get(self.index) {
524 #[expect(clippy::cast_possible_truncation)]
525 let len = ch.len_utf8() as u32;
526 self.offset += len;
527 self.index += 1;
528 }
529 }
530
531 fn eat(&mut self, ch: char) -> bool {
532 if self.peek() == Some(ch) {
533 self.advance();
534 return true;
535 }
536 false
537 }
538
539 fn offset(&self) -> u32 {
540 self.offset
541 }
542
543 fn peek_nth(&self, n: usize) -> Option<char> {
544 let nth = self.index + n;
545 self.chars.get(nth).copied()
546 }
547
548 fn peek(&self) -> Option<char> {
549 self.peek_nth(0)
550 }
551
552 fn peek2(&self) -> Option<char> {
553 self.peek_nth(1)
554 }
555}