swc_ecma_regexp/parser/
mod.rs

1mod flags_parser;
2mod parser_impl;
3mod pattern_parser;
4mod reader;
5mod span_factory;
6
7pub use parser_impl::{ConstructorParser, LiteralParser};
8
9#[cfg(test)]
10mod test {
11
12    use crate::{ConstructorParser, LiteralParser, Options};
13
14    #[test]
15    fn should_pass() {
16        for (pattern_text, flags_text) in &[
17            ("", ""),
18            ("a", ""),
19            ("a+", ""),
20            ("a*", ""),
21            ("a?", ""),
22            ("^$^$^$", ""),
23            ("(?=a){1}", ""),
24            ("(?!a){1}", ""),
25            ("a{1}", ""),
26            ("a{1", ""),
27            ("a|{", ""),
28            ("a{", ""),
29            ("a{,", ""),
30            ("a{1,", ""),
31            ("a{1,}", ""),
32            ("a{1,2}", ""),
33            ("x{9007199254740991}", ""),
34            ("x{9007199254740991,9007199254740991}", ""),
35            ("a|b", ""),
36            ("a|b|c", ""),
37            ("a|b+?|c", ""),
38            ("a+b*?c{1}d{2,}e{3,4}?", ""),
39            (r"^(?=ab)\b(?!cd)(?<=ef)\B(?<!gh)$", ""),
40            ("a.b..", ""),
41            (r"\d\D\s\S\w\W", ""),
42            (r"\x", ""),
43            (
44                r"\p{Emoji_Presentation}\P{Script_Extensions=Latin}\p{Sc}|\p{Basic_Emoji}",
45                "",
46            ),
47            (
48                r"\p{Emoji_Presentation}\P{Script_Extensions=Latin}\p{Sc}|\p{P}",
49                "u",
50            ),
51            (r"^\p{General_Category=cntrl}+$", "u"),
52            (r"\p{Basic_Emoji}", "v"),
53            (r"\n\cM\0\x41\u1f60\.\/", ""),
54            (r"\c0", ""),
55            (r"\0", ""),
56            (r"\0", "u"),
57            (r"\u", ""),
58            (r"\u{", ""),
59            (r"\u{}", ""),
60            (r"\u{0}", ""),
61            (r"\u{1f600}", ""),
62            (r"\u{1f600}", "u"),
63            ("(?:abc)", ""),
64            (r"(?<\u{1d49c}>.)\x1f", ""),
65            ("a]", ""),
66            ("a}", ""),
67            ("]", ""),
68            ("[]", ""),
69            ("[a]", ""),
70            ("[ab]", ""),
71            ("[a-b]", ""),
72            ("[-]", ""),
73            ("[a-]", ""),
74            ("[-a]", ""),
75            ("[-a-]", ""),
76            (r"[a\-b]", ""),
77            (r"[-a-b]", ""),
78            (r"[a-b-]", ""),
79            (r"[a\-b-]", ""),
80            (r"[\[\]\-]", ""),
81            ("[a-z0-9]", ""),
82            ("[a-a]", ""),
83            (r"[\d-\D]", ""),
84            (r"^([\ud801[\udc28-\udc4f])$", ""),
85            (r"[a-c]]", ""),
86            (
87                r"[ϗϙϛϝϟϡϣϥϧϩϫϭϯ-ϳϵϸϻ-ϼа-џѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӂӄӆӈӊӌӎ-ӏӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹӻӽӿԁԃԅԇԉԋԍԏԑԓԕԗԙԛԝԟԡԣա-ևᴀ-ᴫᵢ-ᵷᵹ-ᶚḁḃḅḇḉḋḍḏḑḓḕḗḙḛḝḟḡḣḥḧḩḫḭḯḱḳḵḷḹḻḽḿṁṃṅṇṉṋṍṏṑṓṕṗṙṛṝṟṡṣṥṧṩṫṭṯṱṳṵṷṹṻṽṿẁẃẅẇẉẋẍẏẑẓẕ-ẝẟạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹỻỽỿ-ἇἐ-ἕἠ-ἧἰ-ἷὀ-ὅὐ-ὗὠ-ὧὰ]",
88                "",
89            ),
90            (r"[a-z0-9[.\\]]", "v"),
91            (r"[a&&b&&c]", "v"),
92            (r"[a--b--c]", "v"),
93            (r"[[a-z]--b--c]", "v"),
94            (r"[[[[[[[[[[[[[[[[[[[[[[[[a]]]]]]]]]]]]]]]]]]]]]]]]", "v"),
95            (r"[\q{}\q{a}\q{bc}\q{d|e|f}\q{|||}]", "v"),
96            (r"(?<foo>A)\k<foo>", ""),
97            (r"(?<!a>)\k<a>", ""),
98            (r"\k", ""),
99            (r"\k<4>", ""),
100            (r"\k<a>", ""),
101            (r"(?<a>)\k<a>", ""),
102            (r"(?<a>)\k<a>", "u"),
103            (r"\1", ""),
104            (r"\1()", ""),
105            (r"\1()", "u"),
106            (r"(?<n1>..)(?<n2>..)", ""),
107            // ES2025 ---
108            // Duplicate named capturing groups
109            (r"(?<n1>..)|(?<n1>..)", ""),
110            (r"(?<year>[0-9]{4})-[0-9]{2}|[0-9]{2}-(?<year>[0-9]{4})", ""),
111            (r"(?:(?<a>x)|(?<a>y))\k<a>", ""),
112            (r"(?<x>a)|(?<x>b)", ""),
113            (r"(?:(?<x>a)|(?<y>a)(?<x>b))(?:(?<z>c)|(?<z>d))", ""),
114            (r"(?:(?<x>a)|(?<x>b))\\k<x>", ""),
115            (r"(?:(?:(?<x>a)|(?<x>b)|c)\\k<x>){2}", ""),
116            (r"(?:(?:(?<x>a)|(?<x>b))\\k<x>){2}", ""),
117            (r"(?:(?:(?<x>a)\\k<x>|(?<x>b)\\k<x>)|(?:))\\k<x>", ""),
118            (r"(?:(?:(?<x>a\\k<x>)|(?<x>b\\k<x>))|(?:))\\k<x>", ""),
119            // Modifiers
120            (r"(?:.)", ""),
121            (r"(?s:.)", ""),
122            (r"(?ism:.)", ""),
123            (r"(?-s:.)", ""),
124            (r"(?-smi:.)", ""),
125            (r"(?s-im:.)", ""),
126            (r"(?si-m:.)", ""),
127            (r"(?im-s:.)", "v"),
128            (r"(?ims-:.)", ""),
129        ] {
130            let res =
131                LiteralParser::new(pattern_text, Some(flags_text), Options::default()).parse();
132            if let Err(err) = res {
133                panic!("Failed to parse /{pattern_text}/{flags_text}\n💥 {err}");
134            }
135        }
136    }
137
138    #[test]
139    fn should_fail() {
140        for (pattern_text, flags_text) in &[
141            ("a)", ""),
142            (r"a\", ""),
143            ("a]", "u"),
144            ("a}", "u"),
145            ("a|+", ""),
146            ("a|{", "u"),
147            ("a{", "u"),
148            ("a{1", "u"),
149            ("a{1,", "u"),
150            ("a{,", "u"),
151            ("x{9007199254740992}", ""),
152            ("x{9007199254740991,9007199254740992}", ""),
153            ("x{99999999999999999999999999999999999999999999999999}", ""),
154            (r"\99999999999999999999999999999999999999999999999999", ""),
155            (r"\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}", "u"),
156            ("(?=a", ""),
157            ("(?<!a", ""),
158            (r"\c0", "u"),
159            (r"\xa", "u"),
160            (r"a\u", "u"),
161            (r"\p{Emoji_Presentation", "u"),
162            (r"\p{Script=", "u"),
163            (r"\ka", "u"),
164            (r"\k", "u"),
165            (r"\k<", "u"),
166            (r"\k<>", "u"),
167            (r"\k<4>", "u"),
168            (r"\k<a", "u"),
169            (r"\1", "u"),
170            (r"\k<a>", "u"),
171            ("a(?:", ""),
172            ("(", ""),
173            (")", "v"),
174            ("(a", ""),
175            ("(?<a>", ""),
176            ("(?<", ""),
177            (r"(?<a\>.)", ""),
178            (r"(?<a\>.)", "u"),
179            (r"(?<\>.)", ""),
180            (r"(?<\>.)", "u"),
181            ("(?)", ""),
182            ("(?=a){1}", "u"),
183            ("(?!a){1}", "u"),
184            (r"[\d-\D]", "u"),
185            ("[", ""),
186            ("[", "v"),
187            ("[[", "v"),
188            ("[[]", "v"),
189            ("[z-a]", ""),
190            (r"[a-c]]", "u"),
191            (
192                r"^([a-zªµºß-öø-ÿāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķ-ĸĺļľŀłńņň-ʼnŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷźżž-ƀƃƅƈƌ-ƍƒƕƙ-ƛƞơƣƥƨƪ-ƫƭưƴƶƹ-ƺƽ-ƿdžljnjǎǐǒǔǖǘǚǜ-ǝǟǡǣǥǧǩǫǭǯ-ǰdzǵǹǻǽǿȁȃȅȇȉȋȍȏȑȓȕȗșțȝȟȡȣȥȧȩȫȭȯȱȳ-ȹȼȿ-ɀɂɇɉɋɍɏ-ʓʕ-ʯͱͳͷͻ-ͽΐά-ώϐ-ϑϕ-ϗϙϛϝϟϡϣϥϧϩϫϭϯ-ϳϵϸϻ-ϼа-џѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӂӄӆӈӊӌӎ-ӏӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹӻӽӿԁԃԅԇԉԋԍԏԑԓԕԗԙԛԝԟԡԣա-ևᴀ-ᴫᵢ-ᵷᵹ-ᶚḁḃḅḇḉḋḍḏḑḓḕḗḙḛḝḟḡḣḥḧḩḫḭḯḱḳḵḷḹḻḽḿṁṃṅṇṉṋṍṏṑṓṕṗṙṛṝṟṡṣṥṧṩṫṭṯṱṳṵṷṹṻṽṿẁẃẅẇẉẋẍẏẑẓẕ-ẝẟạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹỻỽỿ-ἇἐ-ἕἠ-ἧἰ-ἷὀ-ὅὐ-ὗὠ-ὧὰ-ώᾀ-ᾇᾐ-ᾗᾠ-ᾧᾰ-ᾴᾶ-ᾷιῂ-ῄῆ-ῇῐ-ΐῖ-ῗῠ-ῧῲ-ῴῶ-ῷⁱⁿℊℎ-ℏℓℯℴℹℼ-ℽⅆ-ⅉⅎↄⰰ-ⱞⱡⱥ-ⱦⱨⱪⱬⱱⱳ-ⱴⱶ-ⱼⲁⲃⲅⲇⲉⲋⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱⲳⲵⲷⲹⲻⲽⲿⳁⳃⳅⳇⳉⳋⳍⳏⳑⳓⳕⳗⳙⳛⳝⳟⳡⳣ-ⳤⴀ-ⴥꙁꙃꙅꙇꙉꙋꙍꙏꙑꙓꙕꙗꙙꙛꙝꙟꙣꙥꙧꙩꙫꙭꚁꚃꚅꚇꚉꚋꚍꚏꚑꚓꚕꚗꜣꜥꜧꜩꜫꜭꜯ-ꜱꜳꜵꜷꜹꜻꜽꜿꝁꝃꝅꝇꝉꝋꝍꝏꝑꝓꝕꝗꝙꝛꝝꝟꝡꝣꝥꝧꝩꝫꝭꝯꝱ-ꝸꝺꝼꝿꞁꞃꞅꞇꞌff-stﬓ-ﬗa-z]|\ud801[\udc28-\udc4f]|\ud835[\udc1a-\udc33\udc4e-\udc54\udc56-\udc67\udc82-\udc9b\udcb6-\udcb9\udcbb\udcbd-\udcc3\udcc5-\udccf\udcea-\udd03\udd1e-\udd37\udd52-\udd6b\udd86-\udd9f\uddba-\uddd3\uddee-\ude07\ude22-\ude3b\ude56-\ude6f\ude8a-\udea5\udec2-\udeda\udedc-\udee1\udefc-\udf14\udf16-\udf1b\udf36-\udf4e\udf50-\udf55\udf70-\udf88\udf8a-\udf8f\udfaa-\udfc2\udfc4-\udfc9\udfcb])$",
193                "",
194            ),
195            (r"[[\d-\D]]", "v"),
196            (r"[a&&b--c]", "v"),
197            (r"[a--b&&c]", "v"),
198            (r"[\q{]", "v"),
199            (r"[\q{\a}]", "v"),
200            // ES2025 ---
201            // Duplicate named capturing groups
202            (r"(?<n>.)(?<n>.)", ""),
203            (r"(?<n>.(?<n>..))", "u"),
204            ("(?<n>)|(?<n>)(?<n>)", ""),
205            ("(((((((?<n>.)))))))(?<n>)", ""),
206            ("(?:(?<x>a)|(?<x>b))(?<x>c)", ""),
207            ("(?<x>a)(?:(?<x>b)|(?<x>c))", ""),
208            ("(?:(?:(?<x>a)|(?<x>b)))(?<x>c)", ""),
209            ("(?:(?:(?<x>a)|(?<x>b))|(?:))(?<x>c)", ""),
210            // Modifiers
211            (r"(?a:.)", ""),
212            (r"(?-S:.)", ""),
213            (r"(?-:.)", ""),
214            (r"(?iM:.)", ""),
215            (r"(?imms:.)", ""),
216            (r"(?-sI:.)", ""),
217            (r"(?ii-s:.)", ""),
218            (r"(?i-msm:.)", ""),
219            (r"(?i", ""),
220            (r"(?i-", ""),
221            (r"(?i-s", ""),
222        ] {
223            assert!(
224                LiteralParser::new(pattern_text, Some(flags_text), Options::default())
225                    .parse()
226                    .is_err(),
227                "/{pattern_text}/{flags_text} should fail to parse, but passed!"
228            );
229        }
230    }
231
232    #[test]
233    fn should_fail_early_errors() {
234        for (pattern_text, flags_text, is_err) in &[
235            // No tests for 4,294,967,295 left parens
236            (r"(?<n>..)(?<n>..)", "", true),
237            (r"a{2,1}", "", true),
238            (r"(?<a>)\k<n>", "", true),
239            (r"()\2", "u", true),
240            (r"[a-\d]", "u", true),
241            (r"[\d-z]", "u", true),
242            (r"[\d-\d]", "u", true),
243            (r"[z-a]", "", true),
244            (r"\u{110000}", "u", true),
245            (r"(?<\uD800\uDBFF>)", "", true),
246            (r"\u{0}\u{110000}", "u", true),
247            (r"(?<a\uD800\uDBFF>)", "", true),
248            (r"\p{Foo=Bar}", "u", true),
249            (r"\p{Foo}", "u", true),
250            (r"\p{Basic_Emoji}", "u", true),
251            (r"\P{Basic_Emoji}", "v", true),
252            (r"[^\p{Basic_Emoji}]", "v", true),
253            (r"[[^\p{Basic_Emoji}]]", "v", true),
254            (r"[^\q{}]", "v", true),
255            (r"[[^\q{}]]", "v", true),
256            (r"[[^\q{ng}]]", "v", true),
257            (r"[[^\q{a|}]]", "v", true),
258            (r"[[^\q{ng}\q{o|k}]]", "v", true),
259            (r"[[^\q{o|k}\q{ng}\q{o|k}]]", "v", true),
260            (r"[[^\q{o|k}\q{o|k}\q{ng}]]", "v", true),
261            (r"[[^\q{}&&\q{ng}]]", "v", true),
262            (r"[[^\q{ng}&&\q{o|k}]]", "v", false),
263            (r"[[^\q{ng}&&\q{o|k}&&\q{ng}]]", "v", false),
264            (r"[[^\q{ng}--\q{o|k}]]", "v", true),
265            (r"[[^\q{o|k}--\q{ng}]]", "v", false),
266            (r"[[z-a]]", "v", true),
267            (r"[[[[[^[[[[\q{ng}]]]]]]]]]", "v", true),
268            (r"[^[[[[[[[[[[[[[[[[\q{ng}]]]]]]]]]]]]]]]]]", "v", true),
269            // ES2025 ---
270            // Duplicated named capture groups
271            ("(?:(?<x>a)|(?<x>b))(?<x>c)", "", true),
272            ("(?:(?<x>a)|(?<x>b))(?<X>c)", "", false),
273            ("(?<x>a)(?:(?<x>b)|(?<x>c))", "", true),
274            ("(?<x>a)|(?:(?<x>b)|(?<x>c))", "", false),
275            // Modifiers
276            (r"(?ii:.)", "", true),
277            (r"(?-ss:.)", "", true),
278            (r"(?im-im:.)", "", true),
279        ] {
280            assert_eq!(
281                LiteralParser::new(pattern_text, Some(flags_text), Options::default())
282                    .parse()
283                    .is_err(),
284                *is_err,
285                "/{pattern_text}/{flags_text} should fail with early error, but passed!"
286            );
287        }
288    }
289
290    #[test]
291    fn should_handle_empty() {
292        let pattern1 = LiteralParser::new("", None, Options::default())
293            .parse()
294            .unwrap();
295        let pattern2 = ConstructorParser::new("''", None, Options::default())
296            .parse()
297            .unwrap();
298
299        assert_eq!(pattern1.body.body[0].body.len(), 1);
300        assert_eq!(pattern2.body.body[0].body.len(), 1);
301    }
302
303    #[test]
304    fn should_handle_unicode() {
305        let source_text = "このEmoji🥹の数が変わる";
306
307        for (flags_text, expected) in [(None, 15), (Some("u"), 14), (Some("v"), 14)] {
308            let pattern = LiteralParser::new(source_text, flags_text, Options::default())
309                .parse()
310                .unwrap();
311            assert_eq!(pattern.body.body[0].body.len(), expected);
312        }
313    }
314
315    #[test]
316    fn span_offset() {
317        let pattern_text = "Adjust span but should have no side effect for parsing";
318        let ret1 = LiteralParser::new(
319            pattern_text,
320            None,
321            Options {
322                pattern_span_offset: 0,
323                flags_span_offset: 0,
324            },
325        )
326        .parse()
327        .unwrap();
328        let ret2 = LiteralParser::new(
329            pattern_text,
330            None,
331            Options {
332                pattern_span_offset: 123,
333                flags_span_offset: 456,
334            },
335        )
336        .parse()
337        .unwrap();
338
339        assert_ne!(ret1.span, ret2.span);
340        assert_eq!(ret1.to_string(), ret2.to_string());
341    }
342
343    #[test]
344    fn string_literal() {
345        let source_text = r"RegExp('Invalid! -> \u{1234568} <-')";
346        let err = ConstructorParser::new(
347            &source_text[7..35],
348            None,
349            Options {
350                pattern_span_offset: 7,
351                ..Options::default()
352            },
353        )
354        .parse();
355        assert!(err.is_err());
356        // println!("{:?}", err.unwrap_err().with_source_code(source_text));
357
358        let ret1 = LiteralParser::new(r"\d{4}-\d{2}-\d{2}", Some("vi"), Options::default())
359            .parse()
360            .unwrap();
361        let ret2 =
362            ConstructorParser::new(r"'\\d{4}-\\d{2}-\\d{2}'", Some("'vi'"), Options::default())
363                .parse()
364                .unwrap();
365        assert_eq!(ret1.to_string(), ret2.to_string());
366    }
367}