swc_ecma_regexp/
lib.rs

1pub mod ast;
2mod diagnostics;
3mod options;
4mod parser;
5mod surrogate_pair;
6
7pub use crate::{
8    options::Options,
9    parser::{ConstructorParser, LiteralParser},
10};
11
12#[cfg(test)]
13mod test {
14
15    use crate::{LiteralParser, Options};
16
17    type Case<'a> = (
18        &'a str,
19        /* expected display, None means expect the same as original */ Option<&'a str>,
20    );
21
22    static CASES: &[Case] = &[
23        ("/ab/", None),
24        ("/ab/u", None),
25        ("/abc/i", None),
26        ("/abc/iu", None),
27        ("/a*?/i", None),
28        ("/a*?/iu", None),
29        ("/emo👈🏻ji/", None),
30        ("/emo👈🏻ji/u", None),
31        ("/ab|c/i", None),
32        ("/ab|c/iu", None),
33        ("/a|b+|c/i", None),
34        ("/a|b+|c/iu", None),
35        ("/(?=a)|(?<=b)|(?!c)|(?<!d)/i", None),
36        ("/(?=a)|(?<=b)|(?!c)|(?<!d)/iu", None),
37        (r"/(cg)(?<n>cg)(?:g)/", None),
38        (r"/(cg)(?<n>cg)(?:g)/u", None),
39        (r"/^(?=ab)\b(?!cd)(?<=ef)\B(?<!gh)$/", None),
40        (r"/^(?=ab)\b(?!cd)(?<=ef)\B(?<!gh)$/u", None),
41        (r"/^(?<!ab)$/", None),
42        (r"/^(?<!ab)$/u", None),
43        (r"/[abc]/", None),
44        (r"/[abc]/u", None),
45        (r"/[a&&b]/v", None),
46        (r"/[a--b]/v", None),
47        (r"/[^a--b--c]/v", None),
48        (r"/[a[b[c[d[e[f[g[h[i[j[k[l]]]]]]]]]]]]/v", None),
49        (r"/[\q{abc|d|e|}]/v", None),
50        (r"/\p{Basic_Emoji}/v", None),
51        (r"/[|\]]/", None),
52        (r"/[|\]]/u", None),
53        (r"/c\]/", None),
54        (r"/c\]/u", None),
55        ("/a{0}|b{1,2}|c{3,}/i", None),
56        ("/a{0}|b{1,2}|c{3,}/iu", None),
57        (r"/Em🥹j/", None),
58        (r"/Em🥹j/u", None),
59        (r"/\n\cM\0\x41\./", None),
60        (r"/\n\cM\0\x41\./u", None),
61        (r"/\n\cM\0\x41\u1234\./", None),
62        (r"/\n\cM\0\x41\u1234\./u", None),
63        (r"/[\bb]/", None),
64        (r"/[\bb]/u", None),
65        (r"/\d+/g", None),
66        (r"/\d+/gu", None),
67        (r"/\D/g", None),
68        (r"/\D/gu", None),
69        (r"/\w/g", None),
70        (r"/\w/gu", None),
71        (r"/\w+/g", None),
72        (r"/\w+/gu", None),
73        (r"/\s/g", None),
74        (r"/\s/gu", None),
75        (r"/\s+/g", None),
76        (r"/\s+/gu", None),
77        (r"/\t\n\v\f\r/", None),
78        (r"/\t\n\v\f\r/u", None),
79        (r"/\p{L}/u", None),
80        (r"/\d/g", None),
81        ("/abcd/igv", Some("/abcd/igv")),
82        (r"/\d/ug", Some(r"/\d/ug")),
83        (r"/\cY/", None),
84        // we capitalize hex unicodes.
85        (
86            r"/\n\cM\0\x41\u{1f600}\./u",
87            Some(r"/\n\cM\0\x41\u{1F600}\./u"),
88        ),
89        (r"/\u02c1/u", Some(r"/\u02C1/u")),
90        (r"/c]/", None),
91        // Octal tests from: <https://github.com/tc39/test262/blob/d62fa93c8f9ce5e687c0bbaa5d2b59670ab2ff60/test/annexB/language/literals/regexp/legacy-octal-escape.js>
92        (r"/\1/", None),
93        (r"/\2/", None),
94        (r"/\3/", None),
95        (r"/\4/", None),
96        (r"/\5/", None),
97        (r"/\6/", None),
98        (r"/\7/", None),
99        (r"/\00/", None),
100        (r"/\07/", None),
101        (r"/\30/", None),
102        (r"/\37/", None),
103        (r"/\40/", None),
104        (r"/\47/", None),
105        (r"/\70/", None),
106        (r"/\77/", None),
107        (r"/\000/", None),
108        (r"/\007/", None),
109        (r"/\070/", None),
110        (r"/\300/", None),
111        (r"/\307/", None),
112        (r"/\370/", None),
113        (r"/\377/", None),
114        (r"/\0111/", None),
115        (r"/\0022/", None),
116        (r"/\0003/", None),
117        (r"/(.)\1/", None),
118        // Identity escape from: <https://github.com/tc39/test262/blob/d62fa93c8f9ce5e687c0bbaa5d2b59670ab2ff60/test/annexB/language/literals/regexp/identity-escape.js>
119        (r"/\C/", None),
120        (r"/O\PQ/", None),
121        (r"/\8/", None),
122        (r"/7\89/", None),
123        (r"/\9/", None),
124        (r"/8\90/", None),
125        (r"/(.)(.)(.)(.)(.)(.)(.)(.)\8\8/", None),
126        // Class escape from: <https://github.com/tc39/test262/blob/d62fa93c8f9ce5e687c0bbaa5d2b59670ab2ff60/test/annexB/language/literals/regexp/class-escape.js>
127        (r"/\c0/", None),
128        (r"/[\c0]/", None),
129        (r"/\c1/", None),
130        (r"/[\c10]+/", None),
131        (r"/\c8/", None),
132        (r"/[\c8]/", None),
133        (r"/[\c80]+/", None),
134        (r"/\c_/", None),
135        // Capitalize hex unicodes --
136        (r"/^|\udf06/gu", Some(r"/^|\uDF06/gu")),
137        (r"/\udf06/", Some(r"/\uDF06/")),
138        (r"/\udf06/u", Some(r"/\uDF06/u")),
139        (r"/^|\udf06/g", Some(r"/^|\uDF06/g")),
140        // --
141        (r"/[\-]/", None),
142        (r"/[\-]/u", None),
143        (r"/[\-]/v", None),
144        (r"/([\-a-z]{0,31})/iu", None),
145        // ES2025 ---
146        (r"/(?i:.)/", None),
147        (r"/(?-s:.)/", None),
148        (r"/(?im-s:.)/u", None),
149        (r"/(?m-is:.)/v", None),
150        (r"/(?smi:.)/v", Some(r"/(?ims:.)/v")),
151    ];
152
153    #[test]
154    fn test_display() {
155        for (input, output) in CASES {
156            let (left_slash, right_slash) = (input.find('/').unwrap(), input.rfind('/').unwrap());
157
158            let pattern = &input[left_slash + 1..right_slash];
159            let flags = &input[right_slash + 1..];
160
161            let actual = LiteralParser::new(pattern, Some(flags), Options::default())
162                .parse()
163                .unwrap();
164
165            let expect = output.unwrap_or(input);
166            assert_eq!(expect, format!("/{actual}/{flags}")); // This uses `Display` impls
167        }
168    }
169}