1mod flags_parser;
2mod parser_impl;
3mod pattern_parser;
4mod reader;
5mod span_factory;
6
7pub use parser_impl::{ConstructorParser, LiteralParser};
8
9#[cfg(test)]
10mod test {
11
12 use crate::{ConstructorParser, LiteralParser, Options};
13
14 #[test]
15 fn should_pass() {
16 for (pattern_text, flags_text) in &[
17 ("", ""),
18 ("a", ""),
19 ("a+", ""),
20 ("a*", ""),
21 ("a?", ""),
22 ("^$^$^$", ""),
23 ("(?=a){1}", ""),
24 ("(?!a){1}", ""),
25 ("a{1}", ""),
26 ("a{1", ""),
27 ("a|{", ""),
28 ("a{", ""),
29 ("a{,", ""),
30 ("a{1,", ""),
31 ("a{1,}", ""),
32 ("a{1,2}", ""),
33 ("x{9007199254740991}", ""),
34 ("x{9007199254740991,9007199254740991}", ""),
35 ("a|b", ""),
36 ("a|b|c", ""),
37 ("a|b+?|c", ""),
38 ("a+b*?c{1}d{2,}e{3,4}?", ""),
39 (r"^(?=ab)\b(?!cd)(?<=ef)\B(?<!gh)$", ""),
40 ("a.b..", ""),
41 (r"\d\D\s\S\w\W", ""),
42 (r"\x", ""),
43 (
44 r"\p{Emoji_Presentation}\P{Script_Extensions=Latin}\p{Sc}|\p{Basic_Emoji}",
45 "",
46 ),
47 (
48 r"\p{Emoji_Presentation}\P{Script_Extensions=Latin}\p{Sc}|\p{P}",
49 "u",
50 ),
51 (r"^\p{General_Category=cntrl}+$", "u"),
52 (r"\p{Basic_Emoji}", "v"),
53 (r"\n\cM\0\x41\u1f60\.\/", ""),
54 (r"\c0", ""),
55 (r"\0", ""),
56 (r"\0", "u"),
57 (r"\u", ""),
58 (r"\u{", ""),
59 (r"\u{}", ""),
60 (r"\u{0}", ""),
61 (r"\u{1f600}", ""),
62 (r"\u{1f600}", "u"),
63 ("(?:abc)", ""),
64 (r"(?<\u{1d49c}>.)\x1f", ""),
65 ("a]", ""),
66 ("a}", ""),
67 ("]", ""),
68 ("[]", ""),
69 ("[a]", ""),
70 ("[ab]", ""),
71 ("[a-b]", ""),
72 ("[-]", ""),
73 ("[a-]", ""),
74 ("[-a]", ""),
75 ("[-a-]", ""),
76 (r"[a\-b]", ""),
77 (r"[-a-b]", ""),
78 (r"[a-b-]", ""),
79 (r"[a\-b-]", ""),
80 (r"[\[\]\-]", ""),
81 ("[a-z0-9]", ""),
82 ("[a-a]", ""),
83 (r"[\d-\D]", ""),
84 (r"^([\ud801[\udc28-\udc4f])$", ""),
85 (r"[a-c]]", ""),
86 (
87 r"[ϗϙϛϝϟϡϣϥϧϩϫϭϯ-ϳϵϸϻ-ϼа-џѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӂӄӆӈӊӌӎ-ӏӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹӻӽӿԁԃԅԇԉԋԍԏԑԓԕԗԙԛԝԟԡԣա-ևᴀ-ᴫᵢ-ᵷᵹ-ᶚḁḃḅḇḉḋḍḏḑḓḕḗḙḛḝḟḡḣḥḧḩḫḭḯḱḳḵḷḹḻḽḿṁṃṅṇṉṋṍṏṑṓṕṗṙṛṝṟṡṣṥṧṩṫṭṯṱṳṵṷṹṻṽṿẁẃẅẇẉẋẍẏẑẓẕ-ẝẟạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹỻỽỿ-ἇἐ-ἕἠ-ἧἰ-ἷὀ-ὅὐ-ὗὠ-ὧὰ]",
88 "",
89 ),
90 (r"[a-z0-9[.\\]]", "v"),
91 (r"[a&&b&&c]", "v"),
92 (r"[a--b--c]", "v"),
93 (r"[[a-z]--b--c]", "v"),
94 (r"[[[[[[[[[[[[[[[[[[[[[[[[a]]]]]]]]]]]]]]]]]]]]]]]]", "v"),
95 (r"[\q{}\q{a}\q{bc}\q{d|e|f}\q{|||}]", "v"),
96 (r"(?<foo>A)\k<foo>", ""),
97 (r"(?<!a>)\k<a>", ""),
98 (r"\k", ""),
99 (r"\k<4>", ""),
100 (r"\k<a>", ""),
101 (r"(?<a>)\k<a>", ""),
102 (r"(?<a>)\k<a>", "u"),
103 (r"\1", ""),
104 (r"\1()", ""),
105 (r"\1()", "u"),
106 (r"(?<n1>..)(?<n2>..)", ""),
107 (r"(?<n1>..)|(?<n1>..)", ""),
110 (r"(?<year>[0-9]{4})-[0-9]{2}|[0-9]{2}-(?<year>[0-9]{4})", ""),
111 (r"(?:(?<a>x)|(?<a>y))\k<a>", ""),
112 (r"(?<x>a)|(?<x>b)", ""),
113 (r"(?:(?<x>a)|(?<y>a)(?<x>b))(?:(?<z>c)|(?<z>d))", ""),
114 (r"(?:(?<x>a)|(?<x>b))\\k<x>", ""),
115 (r"(?:(?:(?<x>a)|(?<x>b)|c)\\k<x>){2}", ""),
116 (r"(?:(?:(?<x>a)|(?<x>b))\\k<x>){2}", ""),
117 (r"(?:(?:(?<x>a)\\k<x>|(?<x>b)\\k<x>)|(?:))\\k<x>", ""),
118 (r"(?:(?:(?<x>a\\k<x>)|(?<x>b\\k<x>))|(?:))\\k<x>", ""),
119 (r"(?:.)", ""),
121 (r"(?s:.)", ""),
122 (r"(?ism:.)", ""),
123 (r"(?-s:.)", ""),
124 (r"(?-smi:.)", ""),
125 (r"(?s-im:.)", ""),
126 (r"(?si-m:.)", ""),
127 (r"(?im-s:.)", "v"),
128 (r"(?ims-:.)", ""),
129 ] {
130 let res =
131 LiteralParser::new(pattern_text, Some(flags_text), Options::default()).parse();
132 if let Err(err) = res {
133 panic!("Failed to parse /{pattern_text}/{flags_text}\n💥 {err}");
134 }
135 }
136 }
137
138 #[test]
139 fn should_fail() {
140 for (pattern_text, flags_text) in &[
141 ("a)", ""),
142 (r"a\", ""),
143 ("a]", "u"),
144 ("a}", "u"),
145 ("a|+", ""),
146 ("a|{", "u"),
147 ("a{", "u"),
148 ("a{1", "u"),
149 ("a{1,", "u"),
150 ("a{,", "u"),
151 ("x{9007199254740992}", ""),
152 ("x{9007199254740991,9007199254740992}", ""),
153 ("x{99999999999999999999999999999999999999999999999999}", ""),
154 (r"\99999999999999999999999999999999999999999999999999", ""),
155 (r"\u{FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF}", "u"),
156 ("(?=a", ""),
157 ("(?<!a", ""),
158 (r"\c0", "u"),
159 (r"\xa", "u"),
160 (r"a\u", "u"),
161 (r"\p{Emoji_Presentation", "u"),
162 (r"\p{Script=", "u"),
163 (r"\ka", "u"),
164 (r"\k", "u"),
165 (r"\k<", "u"),
166 (r"\k<>", "u"),
167 (r"\k<4>", "u"),
168 (r"\k<a", "u"),
169 (r"\1", "u"),
170 (r"\k<a>", "u"),
171 ("a(?:", ""),
172 ("(", ""),
173 (")", "v"),
174 ("(a", ""),
175 ("(?<a>", ""),
176 ("(?<", ""),
177 (r"(?<a\>.)", ""),
178 (r"(?<a\>.)", "u"),
179 (r"(?<\>.)", ""),
180 (r"(?<\>.)", "u"),
181 ("(?)", ""),
182 ("(?=a){1}", "u"),
183 ("(?!a){1}", "u"),
184 (r"[\d-\D]", "u"),
185 ("[", ""),
186 ("[", "v"),
187 ("[[", "v"),
188 ("[[]", "v"),
189 ("[z-a]", ""),
190 (r"[a-c]]", "u"),
191 (
192 r"^([a-zªµºß-öø-ÿāăąćĉċčďđēĕėęěĝğġģĥħĩīĭįıijĵķ-ĸĺļľŀłńņň-ʼnŋōŏőœŕŗřśŝşšţťŧũūŭůűųŵŷźżž-ƀƃƅƈƌ-ƍƒƕƙ-ƛƞơƣƥƨƪ-ƫƭưƴƶƹ-ƺƽ-ƿdžljnjǎǐǒǔǖǘǚǜ-ǝǟǡǣǥǧǩǫǭǯ-ǰdzǵǹǻǽǿȁȃȅȇȉȋȍȏȑȓȕȗșțȝȟȡȣȥȧȩȫȭȯȱȳ-ȹȼȿ-ɀɂɇɉɋɍɏ-ʓʕ-ʯͱͳͷͻ-ͽΐά-ώϐ-ϑϕ-ϗϙϛϝϟϡϣϥϧϩϫϭϯ-ϳϵϸϻ-ϼа-џѡѣѥѧѩѫѭѯѱѳѵѷѹѻѽѿҁҋҍҏґғҕҗҙқҝҟҡңҥҧҩҫҭүұҳҵҷҹһҽҿӂӄӆӈӊӌӎ-ӏӑӓӕӗәӛӝӟӡӣӥӧөӫӭӯӱӳӵӷӹӻӽӿԁԃԅԇԉԋԍԏԑԓԕԗԙԛԝԟԡԣա-ևᴀ-ᴫᵢ-ᵷᵹ-ᶚḁḃḅḇḉḋḍḏḑḓḕḗḙḛḝḟḡḣḥḧḩḫḭḯḱḳḵḷḹḻḽḿṁṃṅṇṉṋṍṏṑṓṕṗṙṛṝṟṡṣṥṧṩṫṭṯṱṳṵṷṹṻṽṿẁẃẅẇẉẋẍẏẑẓẕ-ẝẟạảấầẩẫậắằẳẵặẹẻẽếềểễệỉịọỏốồổỗộớờởỡợụủứừửữựỳỵỷỹỻỽỿ-ἇἐ-ἕἠ-ἧἰ-ἷὀ-ὅὐ-ὗὠ-ὧὰ-ώᾀ-ᾇᾐ-ᾗᾠ-ᾧᾰ-ᾴᾶ-ᾷιῂ-ῄῆ-ῇῐ-ΐῖ-ῗῠ-ῧῲ-ῴῶ-ῷⁱⁿℊℎ-ℏℓℯℴℹℼ-ℽⅆ-ⅉⅎↄⰰ-ⱞⱡⱥ-ⱦⱨⱪⱬⱱⱳ-ⱴⱶ-ⱼⲁⲃⲅⲇⲉⲋⲍⲏⲑⲓⲕⲗⲙⲛⲝⲟⲡⲣⲥⲧⲩⲫⲭⲯⲱⲳⲵⲷⲹⲻⲽⲿⳁⳃⳅⳇⳉⳋⳍⳏⳑⳓⳕⳗⳙⳛⳝⳟⳡⳣ-ⳤⴀ-ⴥꙁꙃꙅꙇꙉꙋꙍꙏꙑꙓꙕꙗꙙꙛꙝꙟꙣꙥꙧꙩꙫꙭꚁꚃꚅꚇꚉꚋꚍꚏꚑꚓꚕꚗꜣꜥꜧꜩꜫꜭꜯ-ꜱꜳꜵꜷꜹꜻꜽꜿꝁꝃꝅꝇꝉꝋꝍꝏꝑꝓꝕꝗꝙꝛꝝꝟꝡꝣꝥꝧꝩꝫꝭꝯꝱ-ꝸꝺꝼꝿꞁꞃꞅꞇꞌff-stﬓ-ﬗa-z]|\ud801[\udc28-\udc4f]|\ud835[\udc1a-\udc33\udc4e-\udc54\udc56-\udc67\udc82-\udc9b\udcb6-\udcb9\udcbb\udcbd-\udcc3\udcc5-\udccf\udcea-\udd03\udd1e-\udd37\udd52-\udd6b\udd86-\udd9f\uddba-\uddd3\uddee-\ude07\ude22-\ude3b\ude56-\ude6f\ude8a-\udea5\udec2-\udeda\udedc-\udee1\udefc-\udf14\udf16-\udf1b\udf36-\udf4e\udf50-\udf55\udf70-\udf88\udf8a-\udf8f\udfaa-\udfc2\udfc4-\udfc9\udfcb])$",
193 "",
194 ),
195 (r"[[\d-\D]]", "v"),
196 (r"[a&&b--c]", "v"),
197 (r"[a--b&&c]", "v"),
198 (r"[\q{]", "v"),
199 (r"[\q{\a}]", "v"),
200 (r"(?<n>.)(?<n>.)", ""),
203 (r"(?<n>.(?<n>..))", "u"),
204 ("(?<n>)|(?<n>)(?<n>)", ""),
205 ("(((((((?<n>.)))))))(?<n>)", ""),
206 ("(?:(?<x>a)|(?<x>b))(?<x>c)", ""),
207 ("(?<x>a)(?:(?<x>b)|(?<x>c))", ""),
208 ("(?:(?:(?<x>a)|(?<x>b)))(?<x>c)", ""),
209 ("(?:(?:(?<x>a)|(?<x>b))|(?:))(?<x>c)", ""),
210 (r"(?a:.)", ""),
212 (r"(?-S:.)", ""),
213 (r"(?-:.)", ""),
214 (r"(?iM:.)", ""),
215 (r"(?imms:.)", ""),
216 (r"(?-sI:.)", ""),
217 (r"(?ii-s:.)", ""),
218 (r"(?i-msm:.)", ""),
219 (r"(?i", ""),
220 (r"(?i-", ""),
221 (r"(?i-s", ""),
222 ] {
223 assert!(
224 LiteralParser::new(pattern_text, Some(flags_text), Options::default())
225 .parse()
226 .is_err(),
227 "/{pattern_text}/{flags_text} should fail to parse, but passed!"
228 );
229 }
230 }
231
232 #[test]
233 fn should_fail_early_errors() {
234 for (pattern_text, flags_text, is_err) in &[
235 (r"(?<n>..)(?<n>..)", "", true),
237 (r"a{2,1}", "", true),
238 (r"(?<a>)\k<n>", "", true),
239 (r"()\2", "u", true),
240 (r"[a-\d]", "u", true),
241 (r"[\d-z]", "u", true),
242 (r"[\d-\d]", "u", true),
243 (r"[z-a]", "", true),
244 (r"\u{110000}", "u", true),
245 (r"(?<\uD800\uDBFF>)", "", true),
246 (r"\u{0}\u{110000}", "u", true),
247 (r"(?<a\uD800\uDBFF>)", "", true),
248 (r"\p{Foo=Bar}", "u", true),
249 (r"\p{Foo}", "u", true),
250 (r"\p{Basic_Emoji}", "u", true),
251 (r"\P{Basic_Emoji}", "v", true),
252 (r"[^\p{Basic_Emoji}]", "v", true),
253 (r"[[^\p{Basic_Emoji}]]", "v", true),
254 (r"[^\q{}]", "v", true),
255 (r"[[^\q{}]]", "v", true),
256 (r"[[^\q{ng}]]", "v", true),
257 (r"[[^\q{a|}]]", "v", true),
258 (r"[[^\q{ng}\q{o|k}]]", "v", true),
259 (r"[[^\q{o|k}\q{ng}\q{o|k}]]", "v", true),
260 (r"[[^\q{o|k}\q{o|k}\q{ng}]]", "v", true),
261 (r"[[^\q{}&&\q{ng}]]", "v", true),
262 (r"[[^\q{ng}&&\q{o|k}]]", "v", false),
263 (r"[[^\q{ng}&&\q{o|k}&&\q{ng}]]", "v", false),
264 (r"[[^\q{ng}--\q{o|k}]]", "v", true),
265 (r"[[^\q{o|k}--\q{ng}]]", "v", false),
266 (r"[[z-a]]", "v", true),
267 (r"[[[[[^[[[[\q{ng}]]]]]]]]]", "v", true),
268 (r"[^[[[[[[[[[[[[[[[[\q{ng}]]]]]]]]]]]]]]]]]", "v", true),
269 ("(?:(?<x>a)|(?<x>b))(?<x>c)", "", true),
272 ("(?:(?<x>a)|(?<x>b))(?<X>c)", "", false),
273 ("(?<x>a)(?:(?<x>b)|(?<x>c))", "", true),
274 ("(?<x>a)|(?:(?<x>b)|(?<x>c))", "", false),
275 (r"(?ii:.)", "", true),
277 (r"(?-ss:.)", "", true),
278 (r"(?im-im:.)", "", true),
279 ] {
280 assert_eq!(
281 LiteralParser::new(pattern_text, Some(flags_text), Options::default())
282 .parse()
283 .is_err(),
284 *is_err,
285 "/{pattern_text}/{flags_text} should fail with early error, but passed!"
286 );
287 }
288 }
289
290 #[test]
291 fn should_handle_empty() {
292 let pattern1 = LiteralParser::new("", None, Options::default())
293 .parse()
294 .unwrap();
295 let pattern2 = ConstructorParser::new("''", None, Options::default())
296 .parse()
297 .unwrap();
298
299 assert_eq!(pattern1.body.body[0].body.len(), 1);
300 assert_eq!(pattern2.body.body[0].body.len(), 1);
301 }
302
303 #[test]
304 fn should_handle_unicode() {
305 let source_text = "このEmoji🥹の数が変わる";
306
307 for (flags_text, expected) in [(None, 15), (Some("u"), 14), (Some("v"), 14)] {
308 let pattern = LiteralParser::new(source_text, flags_text, Options::default())
309 .parse()
310 .unwrap();
311 assert_eq!(pattern.body.body[0].body.len(), expected);
312 }
313 }
314
315 #[test]
316 fn span_offset() {
317 let pattern_text = "Adjust span but should have no side effect for parsing";
318 let ret1 = LiteralParser::new(
319 pattern_text,
320 None,
321 Options {
322 pattern_span_offset: 0,
323 flags_span_offset: 0,
324 },
325 )
326 .parse()
327 .unwrap();
328 let ret2 = LiteralParser::new(
329 pattern_text,
330 None,
331 Options {
332 pattern_span_offset: 123,
333 flags_span_offset: 456,
334 },
335 )
336 .parse()
337 .unwrap();
338
339 assert_ne!(ret1.span, ret2.span);
340 assert_eq!(ret1.to_string(), ret2.to_string());
341 }
342
343 #[test]
344 fn string_literal() {
345 let source_text = r"RegExp('Invalid! -> \u{1234568} <-')";
346 let err = ConstructorParser::new(
347 &source_text[7..35],
348 None,
349 Options {
350 pattern_span_offset: 7,
351 ..Options::default()
352 },
353 )
354 .parse();
355 assert!(err.is_err());
356 let ret1 = LiteralParser::new(r"\d{4}-\d{2}-\d{2}", Some("vi"), Options::default())
359 .parse()
360 .unwrap();
361 let ret2 =
362 ConstructorParser::new(r"'\\d{4}-\\d{2}-\\d{2}'", Some("'vi'"), Options::default())
363 .parse()
364 .unwrap();
365 assert_eq!(ret1.to_string(), ret2.to_string());
366 }
367}