1use std::vec;
2
3use diagnostics::Result;
4use swc_atoms::{atom, Atom as SpanAtom};
5
6use crate::{
7 ast, diagnostics,
8 parser::{
9 pattern_parser::{character, state::State, unicode_property},
10 reader::Reader,
11 span_factory::SpanFactory,
12 },
13 surrogate_pair,
14};
15
16pub struct PatternParser<'a> {
17 reader: Reader<'a>,
18 state: State,
19 span_factory: SpanFactory,
20}
21
22impl<'a> PatternParser<'a> {
23 pub fn new(
24 reader: Reader<'a>,
25 (unicode_mode, unicode_sets_mode): (bool, bool),
26 span_offset: u32,
27 ) -> Self {
28 Self {
29 reader,
30 state: State::new(unicode_mode, unicode_sets_mode),
31 span_factory: SpanFactory::new(span_offset),
32 }
33 }
34
35 pub fn parse(mut self) -> Result<ast::Pattern> {
36 let checkpoint = self.reader.checkpoint();
49
50 self.state
54 .initialize_with_parsing(&mut self.reader)
55 .map_err(|offsets| {
56 diagnostics::duplicated_capturing_group_names(
57 offsets
58 .iter()
59 .map(|&(start, end)| self.span_factory.create(start, end))
60 .collect(),
61 )
62 })?;
63 self.reader.rewind(checkpoint);
64
65 if u32::MAX == self.state.num_of_capturing_groups {
71 return Err(diagnostics::too_may_capturing_groups(
72 self.span_factory.create(0, 0),
73 ));
74 }
75
76 let span_start = self.reader.offset();
78 let disjunction = self.parse_disjunction()?;
79
80 if self.reader.peek().is_some() {
81 let span_start = self.reader.offset();
82 return Err(diagnostics::parse_pattern_incomplete(
83 self.span_factory.create(span_start, self.reader.offset()),
84 ));
85 }
86
87 Ok(ast::Pattern {
88 span: self.span_factory.create(span_start, self.reader.offset()),
89 body: disjunction,
90 })
91 }
92
93 fn parse_disjunction(&mut self) -> Result<ast::Disjunction> {
99 let span_start = self.reader.offset();
100
101 let mut body = vec![];
102 loop {
103 body.push(self.parse_alternative()?);
104
105 if !self.reader.eat('|') {
106 break;
107 }
108 }
109
110 Ok(ast::Disjunction {
111 span: self.span_factory.create(span_start, self.reader.offset()),
112 body,
113 })
114 }
115
116 fn parse_alternative(&mut self) -> Result<ast::Alternative> {
122 let span_start = self.reader.offset();
123
124 let mut body = vec![];
125 while let Some(term) = self.parse_term()? {
126 body.push(term);
127 }
128
129 Ok(ast::Alternative {
130 span: self.span_factory.create(span_start, self.reader.offset()),
131 body,
132 })
133 }
134
135 fn parse_term(&mut self) -> Result<Option<ast::Term>> {
147 if self.state.unicode_mode {
151 if let Some(assertion) = self.parse_assertion()? {
152 return Ok(Some(assertion));
153 }
154
155 let span_start = self.reader.offset();
156 return match (self.parse_atom()?, self.consume_quantifier()?) {
157 (Some(atom), Some(((min, max), greedy))) => {
158 Ok(Some(ast::Term::Quantifier(Box::new(ast::Quantifier {
159 span: self.span_factory.create(span_start, self.reader.offset()),
160 greedy,
161 min,
162 max,
163 body: atom,
164 }))))
165 }
166 (Some(atom), None) => Ok(Some(atom)),
167 (None, Some(_)) => Err(diagnostics::lone_quantifier(
168 self.span_factory.create(span_start, self.reader.offset()),
169 "Atom",
170 )),
171 (None, None) => Ok(None),
172 };
173 }
174
175 let span_start = self.reader.offset();
180 if let Some(assertion) = self.parse_assertion()? {
181 if let ast::Term::LookAroundAssertion(look_around) = &assertion {
183 if matches!(
184 look_around.kind,
185 ast::LookAroundAssertionKind::Lookahead
186 | ast::LookAroundAssertionKind::NegativeLookahead
187 ) {
188 if let Some(((min, max), greedy)) = self.consume_quantifier()? {
189 return Ok(Some(ast::Term::Quantifier(Box::new(ast::Quantifier {
190 span: self.span_factory.create(span_start, self.reader.offset()),
191 greedy,
192 min,
193 max,
194 body: assertion,
195 }))));
196 }
197 }
198 }
199
200 return Ok(Some(assertion));
201 }
202
203 match (self.parse_extended_atom()?, self.consume_quantifier()?) {
204 (Some(extended_atom), Some(((min, max), greedy))) => {
205 Ok(Some(ast::Term::Quantifier(Box::new(ast::Quantifier {
206 span: self.span_factory.create(span_start, self.reader.offset()),
207 min,
208 max,
209 greedy,
210 body: extended_atom,
211 }))))
212 }
213 (Some(extended_atom), None) => Ok(Some(extended_atom)),
214 (None, Some(_)) => Err(diagnostics::lone_quantifier(
215 self.span_factory.create(span_start, self.reader.offset()),
216 "ExtendedAtom",
217 )),
218 (None, None) => Ok(None),
219 }
220 }
221
222 fn parse_assertion(&mut self) -> Result<Option<ast::Term>> {
240 let span_start = self.reader.offset();
241
242 let kind = if self.reader.eat('^') {
243 Some(ast::BoundaryAssertionKind::Start)
244 } else if self.reader.eat('$') {
245 Some(ast::BoundaryAssertionKind::End)
246 } else if self.reader.eat2('\\', 'b') {
247 Some(ast::BoundaryAssertionKind::Boundary)
248 } else if self.reader.eat2('\\', 'B') {
249 Some(ast::BoundaryAssertionKind::NegativeBoundary)
250 } else {
251 None
252 };
253
254 if let Some(kind) = kind {
255 return Ok(Some(ast::Term::BoundaryAssertion(Box::new(
256 ast::BoundaryAssertion {
257 span: self.span_factory.create(span_start, self.reader.offset()),
258 kind,
259 },
260 ))));
261 }
262
263 let kind = if self.reader.eat3('(', '?', '=') {
264 Some(ast::LookAroundAssertionKind::Lookahead)
265 } else if self.reader.eat3('(', '?', '!') {
266 Some(ast::LookAroundAssertionKind::NegativeLookahead)
267 } else if self.reader.eat4('(', '?', '<', '=') {
268 Some(ast::LookAroundAssertionKind::Lookbehind)
269 } else if self.reader.eat4('(', '?', '<', '!') {
270 Some(ast::LookAroundAssertionKind::NegativeLookbehind)
271 } else {
272 None
273 };
274
275 if let Some(kind) = kind {
276 let disjunction = self.parse_disjunction()?;
277
278 if !self.reader.eat(')') {
279 return Err(diagnostics::unterminated_pattern(
280 self.span_factory.create(span_start, self.reader.offset()),
281 "lookaround assertion",
282 ));
283 }
284
285 return Ok(Some(ast::Term::LookAroundAssertion(Box::new(
286 ast::LookAroundAssertion {
287 span: self.span_factory.create(span_start, self.reader.offset()),
288 kind,
289 body: disjunction,
290 },
291 ))));
292 }
293
294 Ok(None)
295 }
296
297 fn parse_atom(&mut self) -> Result<Option<ast::Term>> {
308 let span_start = self.reader.offset();
309
310 if let Some(cp) = self
312 .reader
313 .peek()
314 .filter(|&cp| !character::is_syntax_character(cp))
315 {
316 self.reader.advance();
317
318 return Ok(Some(ast::Term::Character(Box::new(ast::Character {
319 span: self.span_factory.create(span_start, self.reader.offset()),
320 kind: ast::CharacterKind::Symbol,
321 value: cp,
322 }))));
323 }
324
325 if self.reader.eat('.') {
327 return Ok(Some(ast::Term::Dot(ast::Dot {
328 span: self.span_factory.create(span_start, self.reader.offset()),
329 })));
330 }
331
332 if self.reader.eat('\\') {
334 if let Some(atom_escape) = self.parse_atom_escape(span_start)? {
335 return Ok(Some(atom_escape));
336 }
337 }
338
339 if let Some(character_class) = self.parse_character_class()? {
341 return Ok(Some(ast::Term::CharacterClass(Box::new(character_class))));
342 }
343
344 if let Some(capturing_group) = self.parse_capturing_group()? {
348 return Ok(Some(ast::Term::CapturingGroup(Box::new(capturing_group))));
349 }
350
351 if let Some(ignore_group) = self.parse_ignore_group()? {
357 return Ok(Some(ast::Term::IgnoreGroup(Box::new(ignore_group))));
358 }
359
360 Ok(None)
361 }
362
363 fn parse_extended_atom(&mut self) -> Result<Option<ast::Term>> {
376 let span_start = self.reader.offset();
377
378 if self.reader.eat('.') {
380 return Ok(Some(ast::Term::Dot(ast::Dot {
381 span: self.span_factory.create(span_start, self.reader.offset()),
382 })));
383 }
384
385 if self.reader.eat('\\') {
386 if let Some(atom_escape) = self.parse_atom_escape(span_start)? {
388 return Ok(Some(atom_escape));
389 }
390
391 if self.reader.peek().filter(|&cp| cp == 'c' as u32).is_some() {
393 return Ok(Some(ast::Term::Character(Box::new(ast::Character {
394 span: self.span_factory.create(span_start, self.reader.offset()),
395 kind: ast::CharacterKind::Symbol,
396 value: '\\' as u32,
397 }))));
398 }
399
400 return Err(diagnostics::invalid_extended_atom_escape(
401 self.span_factory.create(span_start, self.reader.offset()),
402 ));
403 }
404
405 if let Some(character_class) = self.parse_character_class()? {
407 return Ok(Some(ast::Term::CharacterClass(Box::new(character_class))));
408 }
409
410 if let Some(capturing_group) = self.parse_capturing_group()? {
414 return Ok(Some(ast::Term::CapturingGroup(Box::new(capturing_group))));
415 }
416
417 if let Some(ignore_group) = self.parse_ignore_group()? {
423 return Ok(Some(ast::Term::IgnoreGroup(Box::new(ignore_group))));
424 }
425
426 let span_start = self.reader.offset();
428 if self.consume_quantifier()?.is_some() {
429 return Err(diagnostics::invalid_braced_quantifier(
433 self.span_factory.create(span_start, self.reader.offset()),
434 ));
435 }
436
437 if let Some(cp) = self.consume_extended_pattern_character() {
439 return Ok(Some(ast::Term::Character(Box::new(ast::Character {
440 span: self.span_factory.create(span_start, self.reader.offset()),
441 kind: ast::CharacterKind::Symbol,
442 value: cp,
443 }))));
444 }
445
446 Ok(None)
447 }
448
449 fn parse_atom_escape(&mut self, span_start: u32) -> Result<Option<ast::Term>> {
459 let checkpoint = self.reader.checkpoint();
460
461 if let Some(index) = self.consume_decimal_escape()? {
463 if self.state.unicode_mode {
464 if self.state.num_of_capturing_groups < index {
469 return Err(diagnostics::invalid_indexed_reference(
470 self.span_factory.create(span_start, self.reader.offset()),
471 ));
472 }
473
474 return Ok(Some(ast::Term::IndexedReference(Box::new(
475 ast::IndexedReference {
476 span: self.span_factory.create(span_start, self.reader.offset()),
477 index,
478 },
479 ))));
480 }
481
482 if index <= self.state.num_of_capturing_groups {
483 return Ok(Some(ast::Term::IndexedReference(Box::new(
484 ast::IndexedReference {
485 span: self.span_factory.create(span_start, self.reader.offset()),
486 index,
487 },
488 ))));
489 }
490
491 self.reader.rewind(checkpoint);
492 }
493
494 if let Some(character_class_escape) = self.parse_character_class_escape(span_start) {
496 return Ok(Some(ast::Term::CharacterClassEscape(Box::new(
497 character_class_escape,
498 ))));
499 }
500 if let Some(unicode_property_escape) =
501 self.parse_character_class_escape_unicode(span_start)?
502 {
503 return Ok(Some(ast::Term::UnicodePropertyEscape(Box::new(
504 unicode_property_escape,
505 ))));
506 }
507
508 if let Some(character_escape) = self.parse_character_escape(span_start)? {
510 return Ok(Some(ast::Term::Character(Box::new(character_escape))));
511 }
512
513 if self.state.named_capture_groups && self.reader.eat('k') {
515 if let Some(name) = self.consume_group_name()? {
516 if !self.state.capturing_group_names.contains(&name) {
519 return Err(diagnostics::empty_group_specifier(
520 self.span_factory.create(span_start, self.reader.offset()),
521 ));
522 }
523
524 return Ok(Some(ast::Term::NamedReference(Box::new(
525 ast::NamedReference {
526 span: self.span_factory.create(span_start, self.reader.offset()),
527 name,
528 },
529 ))));
530 }
531
532 return Err(diagnostics::invalid_named_reference(
533 self.span_factory.create(span_start, self.reader.offset()),
534 ));
535 }
536
537 Ok(None)
538 }
539
540 fn parse_character_class_escape(
550 &mut self,
551 span_start: u32,
552 ) -> Option<ast::CharacterClassEscape> {
553 let kind = if self.reader.eat('d') {
554 ast::CharacterClassEscapeKind::D
555 } else if self.reader.eat('D') {
556 ast::CharacterClassEscapeKind::NegativeD
557 } else if self.reader.eat('s') {
558 ast::CharacterClassEscapeKind::S
559 } else if self.reader.eat('S') {
560 ast::CharacterClassEscapeKind::NegativeS
561 } else if self.reader.eat('w') {
562 ast::CharacterClassEscapeKind::W
563 } else if self.reader.eat('W') {
564 ast::CharacterClassEscapeKind::NegativeW
565 } else {
566 return None;
567 };
568
569 Some(ast::CharacterClassEscape {
570 span: self.span_factory.create(span_start, self.reader.offset()),
571 kind,
572 })
573 }
574
575 fn parse_character_class_escape_unicode(
581 &mut self,
582 span_start: u32,
583 ) -> Result<Option<ast::UnicodePropertyEscape>> {
584 if !self.state.unicode_mode {
585 return Ok(None);
586 }
587
588 let negative = if self.reader.eat('p') {
589 false
590 } else if self.reader.eat('P') {
591 true
592 } else {
593 return Ok(None);
594 };
595
596 if self.reader.eat('{') {
597 if let Some((name, value, strings)) =
598 self.consume_unicode_property_value_expression()?
599 {
600 if self.reader.eat('}') {
601 if negative && strings {
609 return Err(diagnostics::invalid_unicode_property_name_negative_strings(
610 self.span_factory.create(span_start, self.reader.offset()),
611 name.as_str(),
612 ));
613 }
614
615 return Ok(Some(ast::UnicodePropertyEscape {
616 span: self.span_factory.create(span_start, self.reader.offset()),
617 negative,
618 strings,
619 name,
620 value,
621 }));
622 }
623 }
624 }
625
626 Err(diagnostics::unterminated_pattern(
627 self.span_factory.create(span_start, self.reader.offset()),
628 "unicode property escape",
629 ))
630 }
631
632 fn parse_character_escape(&mut self, span_start: u32) -> Result<Option<ast::Character>> {
644 if let Some(cp) = self.reader.peek().and_then(character::map_control_escape) {
646 self.reader.advance();
647
648 return Ok(Some(ast::Character {
649 span: self.span_factory.create(span_start, self.reader.offset()),
650 kind: ast::CharacterKind::SingleEscape,
651 value: cp,
652 }));
653 }
654
655 let checkpoint = self.reader.checkpoint();
657 if self.reader.eat('c') {
658 if let Some(cp) = self.reader.peek().and_then(character::map_c_ascii_letter) {
659 self.reader.advance();
660
661 return Ok(Some(ast::Character {
662 span: self.span_factory.create(span_start, self.reader.offset()),
663 kind: ast::CharacterKind::ControlLetter,
664 value: cp,
665 }));
666 }
667 self.reader.rewind(checkpoint);
668 }
669
670 if self.reader.peek().filter(|&cp| cp == '0' as u32).is_some()
672 && self
673 .reader
674 .peek2()
675 .filter(|&cp| character::is_decimal_digit(cp))
676 .is_none()
677 {
678 self.reader.advance();
679
680 return Ok(Some(ast::Character {
681 span: self.span_factory.create(span_start, self.reader.offset()),
682 kind: ast::CharacterKind::Null,
683 value: 0x00,
684 }));
685 }
686
687 if self.reader.eat('x') {
689 if let Some(cp) = self.consume_fixed_hex_digits(2) {
690 return Ok(Some(ast::Character {
691 span: self.span_factory.create(span_start, self.reader.offset()),
692 kind: ast::CharacterKind::HexadecimalEscape,
693 value: cp,
694 }));
695 }
696 self.reader.rewind(checkpoint);
697 }
698
699 if let Some(cp) = self.consume_reg_exp_unicode_escape_sequence(self.state.unicode_mode)? {
701 return Ok(Some(ast::Character {
702 span: self.span_factory.create(span_start, self.reader.offset()),
703 kind: ast::CharacterKind::UnicodeEscape,
704 value: cp,
705 }));
706 }
707
708 if !self.state.unicode_mode {
710 if let Some(cp) = self.consume_legacy_octal_escape_sequence() {
711 let span = self.span_factory.create(span_start, self.reader.offset());
712 let digits = span.hi.0 - span.lo.0 - 1; return Ok(Some(ast::Character {
717 span,
718 kind: (match digits {
719 3 => ast::CharacterKind::Octal3,
720 2 => ast::CharacterKind::Octal2,
721 _ => ast::CharacterKind::Octal1,
722 }),
723 value: cp,
724 }));
725 }
726 }
727
728 if let Some(cp) = self.consume_identity_escape() {
730 return Ok(Some(ast::Character {
731 span: self.span_factory.create(span_start, self.reader.offset()),
732 kind: ast::CharacterKind::Identifier,
733 value: cp,
734 }));
735 }
736
737 Ok(None)
738 }
739
740 fn parse_character_class(&mut self) -> Result<Option<ast::CharacterClass>> {
746 let span_start = self.reader.offset();
747
748 if self.reader.eat('[') {
749 let negative = self.reader.eat('^');
750 let (kind, body) = self.parse_class_contents()?;
751
752 if self.reader.eat(']') {
753 let strings = PatternParser::may_contain_strings_in_class_contents(kind, &body);
754
755 if negative && strings {
758 return Err(diagnostics::invalid_character_class(
759 self.span_factory.create(span_start, self.reader.offset()),
760 ));
761 }
762
763 return Ok(Some(ast::CharacterClass {
764 span: self.span_factory.create(span_start, self.reader.offset()),
765 negative,
766 kind,
767 strings,
768 body,
769 }));
770 }
771
772 return Err(diagnostics::unterminated_pattern(
773 self.span_factory.create(span_start, self.reader.offset()),
774 "character class",
775 ));
776 }
777
778 Ok(None)
779 }
780
781 fn parse_class_contents(
788 &mut self,
789 ) -> Result<(
790 ast::CharacterClassContentsKind,
791 Vec<ast::CharacterClassContents>,
792 )> {
793 if self.reader.peek().filter(|&cp| cp == ']' as u32).is_some()
795 || self.reader.peek().is_none()
797 {
798 return Ok((ast::CharacterClassContentsKind::Union, vec![]));
799 }
800
801 if self.state.unicode_sets_mode {
803 return self.parse_class_set_expression();
804 }
805
806 self.parse_nonempty_class_ranges()
808 }
809
810 fn parse_nonempty_class_ranges(
822 &mut self,
823 ) -> Result<(
824 ast::CharacterClassContentsKind,
825 Vec<ast::CharacterClassContents>,
826 )> {
827 let mut body = vec![];
828
829 loop {
830 let range_span_start = self.reader.offset();
831
832 let Some(class_atom) = self.parse_class_atom()? else {
833 break;
834 };
835
836 let span_start = self.reader.offset();
837 if !self.reader.eat('-') {
838 body.push(class_atom);
840 continue;
841 }
842
843 let dash = ast::CharacterClassContents::Character(Box::new(ast::Character {
844 span: self.span_factory.create(span_start, self.reader.offset()),
845 kind: ast::CharacterKind::Symbol,
846 value: '-' as u32,
847 }));
848
849 let Some(class_atom_to) = self.parse_class_atom()? else {
850 body.push(class_atom);
854 body.push(dash);
855 continue;
856 };
857
858 if let (
861 ast::CharacterClassContents::Character(from),
862 ast::CharacterClassContents::Character(to),
863 ) = (&class_atom, &class_atom_to)
864 {
865 if to.value < from.value {
872 return Err(diagnostics::character_class_range_out_of_order(
873 self.span_factory.create(span_start, self.reader.offset()),
874 "class atom",
875 ));
876 }
877
878 body.push(ast::CharacterClassContents::CharacterClassRange(Box::new(
879 ast::CharacterClassRange {
880 span: from.span.with_hi(to.span.hi),
881 min: *from.clone(),
882 max: *to.clone(),
883 },
884 )));
885 continue;
886 }
887
888 if self.state.unicode_mode {
896 return Err(diagnostics::character_class_range_invalid_atom(
897 self.span_factory
898 .create(range_span_start, self.reader.offset()),
899 ));
900 }
901
902 body.push(class_atom);
903 body.push(dash);
904 body.push(class_atom_to);
905 }
906
907 debug_assert!(!body.is_empty());
909
910 Ok((ast::CharacterClassContentsKind::Union, body))
911 }
912
913 fn parse_class_atom(&mut self) -> Result<Option<ast::CharacterClassContents>> {
919 let span_start = self.reader.offset();
920
921 if self.reader.eat('-') {
922 return Ok(Some(ast::CharacterClassContents::Character(Box::new(
923 ast::Character {
924 span: self.span_factory.create(span_start, self.reader.offset()),
925 kind: ast::CharacterKind::Symbol,
926 value: '-' as u32,
927 },
928 ))));
929 }
930
931 self.parse_class_atom_no_dash()
932 }
933
934 fn parse_class_atom_no_dash(&mut self) -> Result<Option<ast::CharacterClassContents>> {
942 let span_start = self.reader.offset();
943
944 if let Some(cp) = self
945 .reader
946 .peek()
947 .filter(|&cp| cp != '\\' as u32 && cp != ']' as u32 && cp != '-' as u32)
948 {
949 self.reader.advance();
950
951 return Ok(Some(ast::CharacterClassContents::Character(Box::new(
952 ast::Character {
953 span: self.span_factory.create(span_start, self.reader.offset()),
954 kind: ast::CharacterKind::Symbol,
955 value: cp,
956 },
957 ))));
958 }
959
960 if self.reader.eat('\\') {
961 if self.reader.peek().filter(|&cp| cp == 'c' as u32).is_some() {
962 return Ok(Some(ast::CharacterClassContents::Character(Box::new(
963 ast::Character {
964 span: self.span_factory.create(span_start, self.reader.offset()),
965 kind: ast::CharacterKind::Symbol,
966 value: '\\' as u32,
967 },
968 ))));
969 }
970
971 if let Some(class_escape) = self.parse_class_escape(span_start)? {
972 return Ok(Some(class_escape));
973 }
974
975 return Err(diagnostics::invalid_class_atom(
976 self.span_factory.create(span_start, self.reader.offset()),
977 ));
978 }
979
980 Ok(None)
981 }
982
983 fn parse_class_escape(
997 &mut self,
998 span_start: u32,
999 ) -> Result<Option<ast::CharacterClassContents>> {
1000 if self.reader.eat('b') {
1002 return Ok(Some(ast::CharacterClassContents::Character(Box::new(
1003 ast::Character {
1004 span: self.span_factory.create(span_start, self.reader.offset()),
1005 kind: ast::CharacterKind::SingleEscape,
1006 value: 0x08,
1007 },
1008 ))));
1009 }
1010
1011 if self.state.unicode_mode && self.reader.eat('-') {
1013 return Ok(Some(ast::CharacterClassContents::Character(Box::new(
1014 ast::Character {
1015 span: self.span_factory.create(span_start, self.reader.offset()),
1016 kind: ast::CharacterKind::SingleEscape,
1017 value: '-' as u32,
1018 },
1019 ))));
1020 }
1021
1022 if !self.state.unicode_mode {
1024 let checkpoint = self.reader.checkpoint();
1025
1026 if self.reader.eat('c') {
1027 if let Some(cp) = self
1028 .reader
1029 .peek()
1030 .filter(|&cp| character::is_decimal_digit(cp) || cp == '-' as u32)
1031 {
1032 self.reader.advance();
1033
1034 return Ok(Some(ast::CharacterClassContents::Character(Box::new(
1035 ast::Character {
1036 span: self.span_factory.create(span_start, self.reader.offset()),
1037 kind: ast::CharacterKind::ControlLetter,
1038 value: cp,
1039 },
1040 ))));
1041 }
1042
1043 self.reader.rewind(checkpoint);
1044 }
1045 }
1046
1047 if let Some(character_class_escape) = self.parse_character_class_escape(span_start) {
1049 return Ok(Some(ast::CharacterClassContents::CharacterClassEscape(
1050 Box::new(character_class_escape),
1051 )));
1052 }
1053 if let Some(unicode_property_escape) =
1054 self.parse_character_class_escape_unicode(span_start)?
1055 {
1056 return Ok(Some(ast::CharacterClassContents::UnicodePropertyEscape(
1057 Box::new(unicode_property_escape),
1058 )));
1059 }
1060
1061 if let Some(character_escape) = self.parse_character_escape(span_start)? {
1063 return Ok(Some(ast::CharacterClassContents::Character(Box::new(
1064 character_escape,
1065 ))));
1066 }
1067
1068 Ok(None)
1069 }
1070
1071 fn parse_class_set_expression(
1078 &mut self,
1079 ) -> Result<(
1080 ast::CharacterClassContentsKind,
1081 Vec<ast::CharacterClassContents>,
1082 )> {
1083 if let Some(class_set_range) = self.parse_class_set_range()? {
1085 return self.parse_class_set_union(class_set_range);
1086 }
1087
1088 if let Some(class_set_operand) = self.parse_class_set_operand()? {
1089 if self.reader.peek().filter(|&cp| cp == '&' as u32).is_some()
1091 && self.reader.peek2().filter(|&cp| cp == '&' as u32).is_some()
1092 {
1093 return self.parse_class_set_intersection(class_set_operand);
1094 }
1095 if self.reader.peek().filter(|&cp| cp == '-' as u32).is_some()
1097 && self.reader.peek2().filter(|&cp| cp == '-' as u32).is_some()
1098 {
1099 return self.parse_class_set_subtraction(class_set_operand);
1100 }
1101
1102 return self.parse_class_set_union(class_set_operand);
1104 }
1105
1106 let span_start = self.reader.offset();
1107 Err(diagnostics::empty_class_set_expression(
1108 self.span_factory.create(span_start, self.reader.offset()),
1109 ))
1110 }
1111
1112 fn parse_class_set_union(
1118 &mut self,
1119 class_set_range_or_class_set_operand: ast::CharacterClassContents,
1120 ) -> Result<(
1121 ast::CharacterClassContentsKind,
1122 Vec<ast::CharacterClassContents>,
1123 )> {
1124 let mut body = vec![];
1125 body.push(class_set_range_or_class_set_operand);
1126
1127 loop {
1128 if let Some(class_set_range) = self.parse_class_set_range()? {
1129 body.push(class_set_range);
1130 continue;
1131 }
1132 if let Some(class_set_operand) = self.parse_class_set_operand()? {
1133 body.push(class_set_operand);
1134 continue;
1135 }
1136
1137 break;
1138 }
1139
1140 Ok((ast::CharacterClassContentsKind::Union, body))
1141 }
1142
1143 fn parse_class_set_intersection(
1149 &mut self,
1150 class_set_operand: ast::CharacterClassContents,
1151 ) -> Result<(
1152 ast::CharacterClassContentsKind,
1153 Vec<ast::CharacterClassContents>,
1154 )> {
1155 let mut body = vec![];
1156 body.push(class_set_operand);
1157
1158 loop {
1159 if self.reader.peek().filter(|&cp| cp == ']' as u32).is_some() {
1160 break;
1161 }
1162
1163 if self.reader.eat2('&', '&') {
1164 let span_start = self.reader.offset();
1165 if self.reader.eat('&') {
1166 return Err(diagnostics::class_intersection_unexpected_ampersand(
1167 self.span_factory.create(span_start, self.reader.offset()),
1168 ));
1169 }
1170
1171 if let Some(class_set_operand) = self.parse_class_set_operand()? {
1172 body.push(class_set_operand);
1173 continue;
1174 }
1175 }
1176
1177 let span_start = self.reader.offset();
1178 return Err(diagnostics::class_set_expression_invalid_character(
1179 self.span_factory.create(span_start, self.reader.offset()),
1180 "class intersection",
1181 ));
1182 }
1183
1184 Ok((ast::CharacterClassContentsKind::Intersection, body))
1185 }
1186
1187 fn parse_class_set_subtraction(
1193 &mut self,
1194 class_set_operand: ast::CharacterClassContents,
1195 ) -> Result<(
1196 ast::CharacterClassContentsKind,
1197 Vec<ast::CharacterClassContents>,
1198 )> {
1199 let mut body = vec![];
1200 body.push(class_set_operand);
1201
1202 loop {
1203 if self.reader.peek().filter(|&cp| cp == ']' as u32).is_some() {
1204 break;
1205 }
1206
1207 if self.reader.eat2('-', '-') {
1208 if let Some(class_set_operand) = self.parse_class_set_operand()? {
1209 body.push(class_set_operand);
1210 continue;
1211 }
1212 }
1213
1214 let span_start = self.reader.offset();
1215 return Err(diagnostics::class_set_expression_invalid_character(
1216 self.span_factory.create(span_start, self.reader.offset()),
1217 "class subtraction",
1218 ));
1219 }
1220
1221 Ok((ast::CharacterClassContentsKind::Subtraction, body))
1222 }
1223
1224 fn parse_class_set_range(&mut self) -> Result<Option<ast::CharacterClassContents>> {
1229 let checkpoint = self.reader.checkpoint();
1230
1231 if let Some(class_set_character) = self.parse_class_set_character()? {
1232 if self.reader.eat('-') {
1233 if let Some(class_set_character_to) = self.parse_class_set_character()? {
1234 if class_set_character_to.value < class_set_character.value {
1238 return Err(diagnostics::character_class_range_out_of_order(
1239 class_set_character
1240 .span
1241 .with_hi(class_set_character_to.span.hi),
1242 "class set",
1243 ));
1244 }
1245
1246 return Ok(Some(ast::CharacterClassContents::CharacterClassRange(
1247 Box::new(ast::CharacterClassRange {
1248 span: class_set_character
1249 .span
1250 .with_hi(class_set_character_to.span.hi),
1251 min: class_set_character,
1252 max: class_set_character_to,
1253 }),
1254 )));
1255 }
1256 }
1257 }
1258 self.reader.rewind(checkpoint);
1259
1260 Ok(None)
1261 }
1262
1263 fn parse_class_set_operand(&mut self) -> Result<Option<ast::CharacterClassContents>> {
1273 if let Some(nested_class) = self.parse_nested_class()? {
1274 return Ok(Some(nested_class));
1275 }
1276
1277 let span_start = self.reader.offset();
1278 if self.reader.eat3('\\', 'q', '{') {
1279 let (class_string_disjunction_contents, strings) =
1280 self.parse_class_string_disjunction_contents()?;
1281
1282 if self.reader.eat('}') {
1283 return Ok(Some(ast::CharacterClassContents::ClassStringDisjunction(
1284 Box::new(ast::ClassStringDisjunction {
1285 span: self.span_factory.create(span_start, self.reader.offset()),
1286 strings,
1287 body: class_string_disjunction_contents,
1288 }),
1289 )));
1290 }
1291
1292 return Err(diagnostics::unterminated_pattern(
1293 self.span_factory.create(span_start, self.reader.offset()),
1294 "class string disjunction",
1295 ));
1296 }
1297
1298 if let Some(class_set_character) = self.parse_class_set_character()? {
1299 return Ok(Some(ast::CharacterClassContents::Character(Box::new(
1300 class_set_character,
1301 ))));
1302 }
1303
1304 Ok(None)
1305 }
1306
1307 fn parse_nested_class(&mut self) -> Result<Option<ast::CharacterClassContents>> {
1314 let span_start = self.reader.offset();
1315
1316 if self.reader.eat('[') {
1319 let negative = self.reader.eat('^');
1320 let (kind, body) = self.parse_class_contents()?;
1321
1322 if self.reader.eat(']') {
1323 let strings = PatternParser::may_contain_strings_in_class_contents(kind, &body);
1324
1325 if negative && strings {
1328 return Err(diagnostics::character_class_contents_invalid_operands(
1329 self.span_factory.create(span_start, self.reader.offset()),
1330 ));
1331 }
1332
1333 return Ok(Some(ast::CharacterClassContents::NestedCharacterClass(
1334 Box::new(ast::CharacterClass {
1335 span: self.span_factory.create(span_start, self.reader.offset()),
1336 negative,
1337 kind,
1338 strings,
1339 body,
1340 }),
1341 )));
1342 }
1343
1344 return Err(diagnostics::unterminated_pattern(
1345 self.span_factory.create(span_start, self.reader.offset()),
1346 "nested class",
1347 ));
1348 }
1349
1350 let span_start = self.reader.offset();
1352 let checkpoint = self.reader.checkpoint();
1353 if self.reader.eat('\\') {
1354 if let Some(character_class_escape) = self.parse_character_class_escape(span_start) {
1355 return Ok(Some(ast::CharacterClassContents::CharacterClassEscape(
1356 Box::new(character_class_escape),
1357 )));
1358 }
1359 if let Some(unicode_property_escape) =
1360 self.parse_character_class_escape_unicode(span_start)?
1361 {
1362 return Ok(Some(ast::CharacterClassContents::UnicodePropertyEscape(
1363 Box::new(unicode_property_escape),
1364 )));
1365 }
1366
1367 self.reader.rewind(checkpoint);
1368 }
1369
1370 Ok(None)
1371 }
1372
1373 fn parse_class_string_disjunction_contents(&mut self) -> Result<(Vec<ast::ClassString>, bool)> {
1380 let mut body = vec![];
1381 let mut strings = false;
1382
1383 loop {
1384 let class_string = self.parse_class_string()?;
1385
1386 if class_string.strings {
1388 strings = true;
1389 }
1390 body.push(class_string);
1391
1392 if !self.reader.eat('|') {
1393 break;
1394 }
1395 }
1396
1397 if body.is_empty() {
1398 strings = true;
1399 }
1400
1401 Ok((body, strings))
1402 }
1403
1404 fn parse_class_string(&mut self) -> Result<ast::ClassString> {
1414 let span_start = self.reader.offset();
1415
1416 let mut body = vec![];
1417 while let Some(class_set_character) = self.parse_class_set_character()? {
1418 body.push(class_set_character);
1419 }
1420
1421 let strings = body.len() != 1;
1423
1424 Ok(ast::ClassString {
1425 span: self.span_factory.create(span_start, self.reader.offset()),
1426 strings,
1427 body,
1428 })
1429 }
1430
1431 fn parse_class_set_character(&mut self) -> Result<Option<ast::Character>> {
1439 let span_start = self.reader.offset();
1440
1441 if let (Some(cp1), Some(cp2)) = (self.reader.peek(), self.reader.peek2()) {
1442 if !character::is_class_set_reserved_double_punctuator(cp1, cp2)
1443 && !character::is_class_set_syntax_character(cp1)
1444 {
1445 self.reader.advance();
1446
1447 return Ok(Some(ast::Character {
1448 span: self.span_factory.create(span_start, self.reader.offset()),
1449 kind: ast::CharacterKind::Symbol,
1450 value: cp1,
1451 }));
1452 }
1453 }
1454
1455 let checkpoint = self.reader.checkpoint();
1456 if self.reader.eat('\\') {
1457 if let Some(character_escape) = self.parse_character_escape(span_start)? {
1458 return Ok(Some(character_escape));
1459 }
1460
1461 if let Some(cp) = self
1462 .reader
1463 .peek()
1464 .filter(|&cp| character::is_class_set_reserved_punctuator(cp))
1465 {
1466 self.reader.advance();
1467 return Ok(Some(ast::Character {
1468 span: self.span_factory.create(span_start, self.reader.offset()),
1469 kind: ast::CharacterKind::Identifier,
1470 value: cp,
1471 }));
1472 }
1473
1474 if self.reader.eat('b') {
1475 return Ok(Some(ast::Character {
1476 span: self.span_factory.create(span_start, self.reader.offset()),
1477 kind: ast::CharacterKind::SingleEscape,
1478 value: 0x08,
1479 }));
1480 }
1481
1482 self.reader.rewind(checkpoint);
1483 }
1484
1485 Ok(None)
1486 }
1487
1488 fn parse_capturing_group(&mut self) -> Result<Option<ast::CapturingGroup>> {
1495 let span_start = self.reader.offset();
1496 let checkpoint = self.reader.checkpoint();
1497
1498 if self.reader.eat('(') {
1499 let mut group_name = None;
1500
1501 if self.reader.eat('?') {
1503 let Some(name) = self.consume_group_name()? else {
1504 self.reader.rewind(checkpoint);
1506 return Ok(None);
1507 };
1508
1509 group_name = Some(name);
1510 }
1511
1512 let disjunction = self.parse_disjunction()?;
1513 if self.reader.eat(')') {
1514 return Ok(Some(ast::CapturingGroup {
1515 span: self.span_factory.create(span_start, self.reader.offset()),
1516 name: group_name,
1517 body: disjunction,
1518 }));
1519 }
1520
1521 return Err(diagnostics::unterminated_pattern(
1522 self.span_factory.create(span_start, self.reader.offset()),
1523 "capturing group",
1524 ));
1525 }
1526
1527 Ok(None)
1528 }
1529
1530 fn parse_ignore_group(&mut self) -> Result<Option<ast::IgnoreGroup>> {
1536 let span_start = self.reader.offset();
1537
1538 if self.reader.eat2('(', '?') {
1539 let modifiers = if self.reader.peek().filter(|&cp| cp == ':' as u32).is_some() {
1540 None
1541 } else {
1542 self.parse_modifiers()?
1543 };
1544
1545 if self.reader.eat(':') {
1546 let disjunction = self.parse_disjunction()?;
1547
1548 if !self.reader.eat(')') {
1549 return Err(diagnostics::unterminated_pattern(
1550 self.span_factory.create(span_start, self.reader.offset()),
1551 "ignore group",
1552 ));
1553 }
1554
1555 return Ok(Some(ast::IgnoreGroup {
1556 span: self.span_factory.create(span_start, self.reader.offset()),
1557 modifiers,
1558 body: disjunction,
1559 }));
1560 }
1561 }
1562
1563 Ok(None)
1564 }
1565
1566 fn parse_modifiers(&mut self) -> Result<Option<ast::Modifiers>> {
1575 let span_start = self.reader.offset();
1576
1577 let mut enabling = ast::Modifier::empty();
1578 let mut disabling = ast::Modifier::empty();
1579 let mut duplicate = false;
1580
1581 while self
1583 .reader
1584 .peek()
1585 .filter(|&cp| cp == ':' as u32 || cp == '-' as u32)
1586 .is_none()
1587 {
1588 if self.reader.eat('i') {
1589 if enabling.contains(ast::Modifier::I) {
1590 duplicate = true;
1591 }
1592 enabling |= ast::Modifier::I;
1593 continue;
1594 }
1595 if self.reader.eat('m') {
1596 if enabling.contains(ast::Modifier::M) {
1597 duplicate = true;
1598 }
1599 enabling |= ast::Modifier::M;
1600 continue;
1601 }
1602 if self.reader.eat('s') {
1603 if enabling.contains(ast::Modifier::S) {
1604 duplicate = true;
1605 }
1606 enabling |= ast::Modifier::S;
1607 continue;
1608 }
1609
1610 return Err(diagnostics::unknown_modifiers(
1611 self.span_factory.create(span_start, self.reader.offset()),
1612 ));
1613 }
1614
1615 if self.reader.eat('-') {
1617 while self.reader.peek().filter(|&cp| cp == ':' as u32).is_none() {
1618 if self.reader.eat('i') {
1619 if disabling.contains(ast::Modifier::I) {
1620 duplicate = true;
1621 }
1622 disabling |= ast::Modifier::I;
1623 continue;
1624 }
1625 if self.reader.eat('m') {
1626 if disabling.contains(ast::Modifier::M) {
1627 duplicate = true;
1628 }
1629 disabling |= ast::Modifier::M;
1630 continue;
1631 }
1632 if self.reader.eat('s') {
1633 if disabling.contains(ast::Modifier::S) {
1634 duplicate = true;
1635 }
1636 disabling |= ast::Modifier::S;
1637 continue;
1638 }
1639
1640 return Err(diagnostics::unknown_modifiers(
1641 self.span_factory.create(span_start, self.reader.offset()),
1642 ));
1643 }
1644 }
1645
1646 if enabling.is_empty() && disabling.is_empty()
1659 || duplicate
1660 || [ast::Modifier::I, ast::Modifier::M, ast::Modifier::S]
1661 .iter()
1662 .any(|&modifier| enabling.contains(modifier) && disabling.contains(modifier))
1663 {
1664 return Err(diagnostics::invalid_modifiers(
1665 self.span_factory.create(span_start, self.reader.offset()),
1666 ));
1667 }
1668
1669 Ok(Some(ast::Modifiers {
1670 span: self.span_factory.create(span_start, self.reader.offset()),
1671 enabling,
1672 disabling,
1673 }))
1674 }
1675
1676 fn consume_quantifier(&mut self) -> Result<Option<((u64, Option<u64>), bool)>> {
1693 const MAX_QUANTIFIER: u64 = 9_007_199_254_740_991; let is_greedy = |reader: &mut Reader| !reader.eat('?');
1695
1696 if self.reader.eat('*') {
1697 return Ok(Some(((0, None), is_greedy(&mut self.reader))));
1698 }
1699 if self.reader.eat('+') {
1700 return Ok(Some(((1, None), is_greedy(&mut self.reader))));
1701 }
1702 if self.reader.eat('?') {
1703 return Ok(Some(((0, Some(1)), is_greedy(&mut self.reader))));
1704 }
1705
1706 let span_start = self.reader.offset();
1707 let checkpoint = self.reader.checkpoint();
1708 if self.reader.eat('{') {
1709 if let Some(min) = self.consume_decimal_digits()? {
1710 if self.reader.eat('}') {
1711 if MAX_QUANTIFIER < min {
1712 return Err(diagnostics::too_large_number_in_braced_quantifier(
1713 self.span_factory.create(span_start, self.reader.offset()),
1714 ));
1715 }
1716
1717 return Ok(Some(((min, Some(min)), is_greedy(&mut self.reader))));
1718 }
1719
1720 if self.reader.eat(',') {
1721 if self.reader.eat('}') {
1722 if MAX_QUANTIFIER < min {
1723 return Err(diagnostics::too_large_number_in_braced_quantifier(
1724 self.span_factory.create(span_start, self.reader.offset()),
1725 ));
1726 }
1727
1728 return Ok(Some(((min, None), is_greedy(&mut self.reader))));
1729 }
1730
1731 if let Some(max) = self.consume_decimal_digits()? {
1732 if self.reader.eat('}') {
1733 if max < min {
1734 return Err(diagnostics::braced_quantifier_out_of_order(
1738 self.span_factory.create(span_start, self.reader.offset()),
1739 ));
1740 }
1741 if MAX_QUANTIFIER < min || MAX_QUANTIFIER < max {
1742 return Err(diagnostics::too_large_number_in_braced_quantifier(
1743 self.span_factory.create(span_start, self.reader.offset()),
1744 ));
1745 }
1746
1747 return Ok(Some(((min, Some(max)), is_greedy(&mut self.reader))));
1748 }
1749 }
1750 }
1751 }
1752
1753 self.reader.rewind(checkpoint);
1754 }
1755
1756 Ok(None)
1757 }
1758
1759 fn consume_decimal_escape(&mut self) -> Result<Option<u32>> {
1764 let checkpoint = self.reader.checkpoint();
1765
1766 if let Some(index) = self.consume_decimal_digits()? {
1767 if index != 0 {
1769 #[expect(clippy::cast_possible_truncation)]
1770 return Ok(Some(index as u32));
1771 }
1772
1773 self.reader.rewind(checkpoint);
1774 }
1775
1776 Ok(None)
1777 }
1778
1779 fn consume_decimal_digits(&mut self) -> Result<Option<u64>> {
1787 let span_start = self.reader.offset();
1788 let checkpoint = self.reader.checkpoint();
1789
1790 let mut value: u64 = 0;
1791 while let Some(cp) = self
1792 .reader
1793 .peek()
1794 .filter(|&cp| character::is_decimal_digit(cp))
1795 {
1796 #[expect(clippy::cast_lossless)]
1798 let d = (cp - '0' as u32) as u64;
1799
1800 if let Some(v) = value.checked_mul(10).and_then(|v| v.checked_add(d)) {
1803 value = v;
1804 self.reader.advance();
1805 } else {
1806 return Err(diagnostics::too_large_number_digits(
1807 self.span_factory.create(span_start, self.reader.offset()),
1808 "decimal",
1809 ));
1810 }
1811 }
1812
1813 if self.reader.checkpoint() != checkpoint {
1814 return Ok(Some(value));
1815 }
1816
1817 Ok(None)
1818 }
1819
1820 fn consume_unicode_property_value_expression(
1827 &mut self,
1828 ) -> Result<Option<(SpanAtom, Option<SpanAtom>, bool)>> {
1829 let checkpoint = self.reader.checkpoint();
1830
1831 if let Some(name) = self.consume_unicode_property_name() {
1833 if self.reader.eat('=') {
1834 let span_start = self.reader.offset();
1835
1836 if let Some(value) = self.consume_unicode_property_value() {
1837 if !unicode_property::is_valid_unicode_property(&name, &value) {
1847 return Err(diagnostics::invalid_unicode_property(
1848 self.span_factory.create(span_start, self.reader.offset()),
1849 "name",
1850 ));
1851 }
1852
1853 return Ok(Some((name, Some(value), false)));
1854 }
1855 }
1856 }
1857 self.reader.rewind(checkpoint);
1858
1859 let span_start = self.reader.offset();
1860 if let Some(name_or_value) = self.consume_unicode_property_value() {
1862 if unicode_property::is_valid_unicode_property("General_Category", &name_or_value) {
1870 return Ok(Some((
1871 atom!("General_Category"),
1872 Some(name_or_value),
1873 false,
1874 )));
1875 }
1876 if unicode_property::is_valid_lone_unicode_property(&name_or_value) {
1877 return Ok(Some((name_or_value, None, false)));
1878 }
1879 if unicode_property::is_valid_lone_unicode_property_of_strings(&name_or_value) {
1885 if !self.state.unicode_sets_mode {
1886 return Err(diagnostics::invalid_unicode_property_of_strings(
1887 self.span_factory.create(span_start, self.reader.offset()),
1888 name_or_value.as_str(),
1889 ));
1890 }
1891
1892 return Ok(Some((name_or_value, None, true)));
1893 }
1894
1895 return Err(diagnostics::invalid_unicode_property(
1896 self.span_factory.create(span_start, self.reader.offset()),
1897 "name and/or value",
1898 ));
1899 }
1900
1901 Ok(None)
1902 }
1903
1904 fn consume_unicode_property_name(&mut self) -> Option<SpanAtom> {
1905 let span_start = self.reader.offset();
1906
1907 let checkpoint = self.reader.checkpoint();
1908 while character::is_unicode_property_name_character(self.reader.peek()?) {
1909 self.reader.advance();
1910 }
1911
1912 if checkpoint == self.reader.checkpoint() {
1913 return None;
1914 }
1915
1916 Some(self.reader.atom(span_start, self.reader.offset()))
1917 }
1918
1919 fn consume_unicode_property_value(&mut self) -> Option<SpanAtom> {
1920 let span_start = self.reader.offset();
1921
1922 let checkpoint = self.reader.checkpoint();
1923 while character::is_unicode_property_value_character(self.reader.peek()?) {
1924 self.reader.advance();
1925 }
1926
1927 if checkpoint == self.reader.checkpoint() {
1928 return None;
1929 }
1930
1931 Some(self.reader.atom(span_start, self.reader.offset()))
1932 }
1933
1934 fn consume_group_name(&mut self) -> Result<Option<SpanAtom>> {
1939 let span_start = self.reader.offset();
1940
1941 if !self.reader.eat('<') {
1942 return Ok(None);
1943 }
1944
1945 if let Some(group_name) = self.consume_reg_exp_idenfigier_name()? {
1946 if self.reader.eat('>') {
1947 return Ok(Some(group_name));
1948 }
1949 }
1950
1951 Err(diagnostics::unterminated_pattern(
1952 self.span_factory.create(span_start, self.reader.offset()),
1953 "capturing group name",
1954 ))
1955 }
1956
1957 fn consume_reg_exp_idenfigier_name(&mut self) -> Result<Option<SpanAtom>> {
1963 let span_start = self.reader.offset();
1964
1965 if self.consume_reg_exp_idenfigier_start()?.is_some() {
1966 while self.consume_reg_exp_idenfigier_part()?.is_some() {}
1967 return Ok(Some(self.reader.atom(span_start, self.reader.offset())));
1968 }
1969
1970 Ok(None)
1971 }
1972
1973 fn consume_reg_exp_idenfigier_start(&mut self) -> Result<Option<u32>> {
1980 if let Some(cp) = self
1981 .reader
1982 .peek()
1983 .filter(|&cp| character::is_identifier_start_char(cp))
1984 {
1985 self.reader.advance();
1986 return Ok(Some(cp));
1987 }
1988
1989 let span_start = self.reader.offset();
1990 if self.reader.eat('\\') {
1991 if let Some(cp) = self.consume_reg_exp_unicode_escape_sequence(true)? {
1992 if !character::is_identifier_start_char(cp) {
1997 return Err(diagnostics::invalid_unicode_escape_sequence(
1998 self.span_factory.create(span_start, self.reader.offset()),
1999 ));
2000 }
2001
2002 return Ok(Some(cp));
2003 }
2004
2005 return Err(diagnostics::invalid_unicode_escape_sequence(
2006 self.span_factory.create(span_start, self.reader.offset()),
2007 ));
2008 }
2009
2010 if !self.state.unicode_mode {
2011 let span_start = self.reader.offset();
2012
2013 if let Some(lead_surrogate) = self
2014 .reader
2015 .peek()
2016 .filter(|&cp| surrogate_pair::is_lead_surrogate(cp))
2017 {
2018 if let Some(trail_surrogate) = self
2019 .reader
2020 .peek2()
2021 .filter(|&cp| surrogate_pair::is_trail_surrogate(cp))
2022 {
2023 self.reader.advance();
2024 self.reader.advance();
2025 let cp =
2026 surrogate_pair::combine_surrogate_pair(lead_surrogate, trail_surrogate);
2027
2028 if !character::is_unicode_id_start(cp) {
2033 return Err(diagnostics::invalid_surrogate_pair(
2034 self.span_factory.create(span_start, self.reader.offset()),
2035 ));
2036 }
2037
2038 return Ok(Some(cp));
2039 }
2040 }
2041 }
2042
2043 Ok(None)
2044 }
2045
2046 fn consume_reg_exp_idenfigier_part(&mut self) -> Result<Option<u32>> {
2053 if let Some(cp) = self
2054 .reader
2055 .peek()
2056 .filter(|&cp| character::is_identifier_part_char(cp))
2057 {
2058 self.reader.advance();
2059 return Ok(Some(cp));
2060 }
2061
2062 let span_start = self.reader.offset();
2063 if self.reader.eat('\\') {
2064 if let Some(cp) = self.consume_reg_exp_unicode_escape_sequence(true)? {
2065 if !character::is_identifier_part_char(cp) {
2070 return Err(diagnostics::invalid_unicode_escape_sequence(
2071 self.span_factory.create(span_start, self.reader.offset()),
2072 ));
2073 }
2074
2075 return Ok(Some(cp));
2076 }
2077
2078 return Err(diagnostics::invalid_unicode_escape_sequence(
2079 self.span_factory.create(span_start, self.reader.offset()),
2080 ));
2081 }
2082
2083 if !self.state.unicode_mode {
2084 let span_start = self.reader.offset();
2085
2086 if let Some(lead_surrogate) = self
2087 .reader
2088 .peek()
2089 .filter(|&cp| surrogate_pair::is_lead_surrogate(cp))
2090 {
2091 if let Some(trail_surrogate) = self
2092 .reader
2093 .peek2()
2094 .filter(|&cp| surrogate_pair::is_trail_surrogate(cp))
2095 {
2096 self.reader.advance();
2097 self.reader.advance();
2098
2099 let cp =
2100 surrogate_pair::combine_surrogate_pair(lead_surrogate, trail_surrogate);
2101 if !character::is_unicode_id_continue(cp) {
2105 return Err(diagnostics::invalid_surrogate_pair(
2106 self.span_factory.create(span_start, self.reader.offset()),
2107 ));
2108 }
2109
2110 return Ok(Some(cp));
2111 }
2112 }
2113 }
2114
2115 Ok(None)
2116 }
2117
2118 fn consume_reg_exp_unicode_escape_sequence(
2128 &mut self,
2129 unicode_mode: bool,
2130 ) -> Result<Option<u32>> {
2131 let span_start = self.reader.offset();
2132 let checkpoint = self.reader.checkpoint();
2133
2134 if self.reader.eat('u') {
2135 if unicode_mode {
2136 let checkpoint = self.reader.checkpoint();
2137
2138 if let Some(lead_surrogate) = self
2140 .consume_fixed_hex_digits(4)
2141 .filter(|&cp| surrogate_pair::is_lead_surrogate(cp))
2142 {
2143 if self.reader.eat2('\\', 'u') {
2144 if let Some(trail_surrogate) = self
2145 .consume_fixed_hex_digits(4)
2146 .filter(|&cp| surrogate_pair::is_trail_surrogate(cp))
2147 {
2148 return Ok(Some(surrogate_pair::combine_surrogate_pair(
2149 lead_surrogate,
2150 trail_surrogate,
2151 )));
2152 }
2153 }
2154 }
2155 self.reader.rewind(checkpoint);
2156
2157 if let Some(lead_surrogate) = self
2159 .consume_fixed_hex_digits(4)
2160 .filter(|&cp| surrogate_pair::is_lead_surrogate(cp))
2161 {
2162 return Ok(Some(lead_surrogate));
2163 }
2164 self.reader.rewind(checkpoint);
2165
2166 if let Some(trail_surrogate) = self
2168 .consume_fixed_hex_digits(4)
2169 .filter(|&cp| surrogate_pair::is_trail_surrogate(cp))
2170 {
2171 return Ok(Some(trail_surrogate));
2172 }
2173 self.reader.rewind(checkpoint);
2174 }
2175
2176 if let Some(hex_digits) = self.consume_fixed_hex_digits(4) {
2178 return Ok(Some(hex_digits));
2179 }
2180
2181 if unicode_mode {
2183 let checkpoint = self.reader.checkpoint();
2184
2185 if self.reader.eat('{') {
2186 if let Some(hex_digits) = self
2187 .consume_hex_digits()?
2188 .filter(|&cp| character::is_valid_unicode(cp))
2189 {
2190 if self.reader.eat('}') {
2191 return Ok(Some(hex_digits));
2192 }
2193 }
2194 }
2195 self.reader.rewind(checkpoint);
2196 }
2197
2198 if self.state.unicode_mode {
2199 return Err(diagnostics::invalid_unicode_escape_sequence(
2200 self.span_factory.create(span_start, self.reader.offset()),
2201 ));
2202 }
2203
2204 self.reader.rewind(checkpoint);
2205 }
2206
2207 Ok(None)
2208 }
2209
2210 fn consume_legacy_octal_escape_sequence(&mut self) -> Option<u32> {
2219 if let Some(first) = self.consume_octal_digit() {
2220 if first == 0
2222 && self
2223 .reader
2224 .peek()
2225 .filter(|&cp| cp == '8' as u32 || cp == '9' as u32)
2226 .is_some()
2227 {
2228 return Some(first);
2229 }
2230
2231 if let Some(second) = self.consume_octal_digit() {
2232 if let Some(third) = self.consume_octal_digit() {
2233 if first <= 3 {
2235 return Some(first * 64 + second * 8 + third);
2236 }
2237 }
2238
2239 return Some(first * 8 + second);
2242 }
2243
2244 return Some(first);
2246 }
2247
2248 None
2249 }
2250
2251 fn consume_octal_digit(&mut self) -> Option<u32> {
2252 let cp = self.reader.peek()?;
2253
2254 if character::is_octal_digit(cp) {
2255 self.reader.advance();
2256 return Some(cp - '0' as u32);
2258 }
2259
2260 None
2261 }
2262
2263 fn consume_identity_escape(&mut self) -> Option<u32> {
2275 let cp = self.reader.peek()?;
2276
2277 if self.state.unicode_mode {
2278 if character::is_syntax_character(cp) || cp == '/' as u32 {
2279 self.reader.advance();
2280 return Some(cp);
2281 }
2282 return None;
2283 }
2284
2285 if self.state.named_capture_groups {
2286 if cp != 'c' as u32 && cp != 'k' as u32 {
2287 self.reader.advance();
2288 return Some(cp);
2289 }
2290 return None;
2291 }
2292
2293 if cp != 'c' as u32 {
2294 self.reader.advance();
2295 return Some(cp);
2296 }
2297
2298 None
2299 }
2300
2301 fn consume_extended_pattern_character(&mut self) -> Option<u32> {
2306 let cp = self.reader.peek()?;
2307
2308 if cp == '^' as u32
2309 || cp == '$' as u32
2310 || cp == '\\' as u32
2311 || cp == '.' as u32
2312 || cp == '*' as u32
2313 || cp == '+' as u32
2314 || cp == '?' as u32
2315 || cp == '(' as u32
2316 || cp == ')' as u32
2317 || cp == '[' as u32
2318 || cp == '|' as u32
2319 {
2320 return None;
2321 }
2322
2323 self.reader.advance();
2324 Some(cp)
2325 }
2326
2327 fn consume_hex_digits(&mut self) -> Result<Option<u32>> {
2328 let span_start = self.reader.offset();
2329 let checkpoint = self.reader.checkpoint();
2330
2331 let mut value: u32 = 0;
2332 while let Some(hex) = self.reader.peek().and_then(character::map_hex_digit) {
2333 if let Some(v) = value.checked_mul(16).and_then(|v| v.checked_add(hex)) {
2335 value = v;
2336 self.reader.advance();
2337 } else {
2338 return Err(diagnostics::too_large_number_digits(
2339 self.span_factory.create(span_start, self.reader.offset()),
2340 "hex",
2341 ));
2342 }
2343 }
2344
2345 if self.reader.checkpoint() != checkpoint {
2346 return Ok(Some(value));
2347 }
2348
2349 Ok(None)
2350 }
2351
2352 fn consume_fixed_hex_digits(&mut self, len: usize) -> Option<u32> {
2353 let checkpoint = self.reader.checkpoint();
2354
2355 let mut value = 0;
2356 for _ in 0..len {
2357 let Some(hex) = self.reader.peek().and_then(character::map_hex_digit) else {
2358 self.reader.rewind(checkpoint);
2359 return None;
2360 };
2361
2362 value = (16 * value) + hex;
2363 self.reader.advance();
2364 }
2365
2366 Some(value)
2367 }
2368
2369 fn may_contain_strings_in_class_contents(
2372 kind: ast::CharacterClassContentsKind,
2373 body: &[ast::CharacterClassContents],
2374 ) -> bool {
2375 let may_contain_strings = |item: &ast::CharacterClassContents| match item {
2376 ast::CharacterClassContents::UnicodePropertyEscape(item) => item.strings,
2381 ast::CharacterClassContents::ClassStringDisjunction(item) => item.strings,
2387 ast::CharacterClassContents::NestedCharacterClass(item) => item.strings,
2390 _ => false,
2391 };
2392
2393 match kind {
2394 ast::CharacterClassContentsKind::Union => body.iter().any(may_contain_strings),
2399 ast::CharacterClassContentsKind::Intersection => body.iter().all(may_contain_strings),
2404 ast::CharacterClassContentsKind::Subtraction => {
2409 body.iter().next().is_some_and(may_contain_strings)
2410 }
2411 }
2412 }
2413}