swc_ecma_regexp/parser/reader/string_literal_parser/
mod.rs

1pub mod ast;
2mod characters;
3mod diagnostics;
4mod options;
5mod parser_impl;
6
7pub use options::Options;
8pub use parser_impl::{parse_regexp_literal, Parser};
9
10#[cfg(test)]
11mod test {
12    use swc_common::Span;
13
14    use super::{ast, parse_regexp_literal, Options, Parser};
15
16    trait SpanExt {
17        fn source_text(self, source_text: &str) -> &str;
18    }
19
20    impl SpanExt for Span {
21        fn source_text(self, source_text: &str) -> &str {
22            &source_text[self.lo.0 as usize..self.hi.0 as usize]
23        }
24    }
25
26    #[test]
27    fn should_pass() {
28        for source_text in [
29            r#""""#,
30            r"''",
31            r#""Hello, world!""#,
32            r"'Hello, world!'",
33            r#""He said, \"Hello!\"""#,
34            r#"'She said, "Hello!"'"#,
35            r"'It\'s a sunny day'",
36            "'Line1\\nLine2'",
37            "'Column1\tColumn2'",
38            r#""Path to file: C:\\Program Files\\MyApp""#,
39            r"'Backspace\bTest'",
40            r"'FormFeed\fTest'",
41            "'CarriageReturn\\rTest'",
42            r"'VerticalTab\vTest'",
43            r#""NullChar\0Test""#,
44            r#""Hex A: \x41""#,
45            r#""Unicode A: \u0041""#,
46            r#""Smiley: šŸ˜€""#,
47            r#""Smiley: \u{1F600}""#,
48            r#""Octal 7: \7""#,
49            r#""Octal 77: \77""#,
50            r#""Octal 123: \123""#,
51            r#""Non-octal digits: \8\9""#,
52            r#""This is a long string\
53    that spans\
54 multiple lines""#,
55            r"'Line separator:\u2028Test'",
56            r"'Paragraph separator:\u2029Test'",
57            r#""NonEscapeCharacter: \c""#,
58            r#""Zero followed by 8 and 9: \08\09""#,
59            r#""NonZeroOctalDigit not followed by OctalDigit: \1x""#,
60            r#""ZeroToThree OctalDigit not followed by OctalDigit: \33x""#,
61            r#""FourToSeven OctalDigit: \47""#,
62            r#""Unicode brace escape: \u{0041}""#,
63            r#""Escaped backslash and quote: \\\"""#,
64            r#""Invalid escape: \@""#,
65            r#""He said, 'Hello!'""#,
66            r#"'She replied, "Hi!"'"#,
67            r#""Multiple escapes: \n\t\r""#,
68            r#""Longest octal escape: \377""#,
69            r#""Number at end: \1234""#,
70            r#""Escape followed by letter: \1a""#,
71            r#""This is a long string that spans\
72 multiple lines using\
73 multiple continuations""#,
74            r#""Not an escape: \\g""#,
75            r#""Surrogate pair: \uD83D\uDE00""#,
76            r#""Line continuation with terminator: \
77\r\n""#,
78            r#""Special chars: !@#$%^&*()_+-=[]{}|;':,.<>/?\"''""#,
79            r#""Combined escapes: \n\\\"\t\u0041\x42""#,
80        ] {
81            if let Err(err) = Parser::new(source_text, Options::default()).parse() {
82                panic!("Expect to parse: {source_text} but failed: {err}");
83            }
84        }
85    }
86
87    #[test]
88    fn should_fail() {
89        for source_text in [
90            r"Not quoted",
91            r"'Unterminated",
92            r#""Line terminator
93without continuation""#,
94            r#""Invalid hex escape: \xG1""#,
95            r#""Invalid escapes: \x\y\z""#,
96            r#""Invalid unicode escape: \u00G1""#,
97            r#""Invalid unicode brace escape: \u{G1}""#,
98            r#""Too many digits: \u{1234567}""#,
99            r#""str"+'str'"#,
100            r#"'str'+"str""#,
101        ] {
102            let result = Parser::new(source_text, Options::default()).parse();
103            assert!(
104                result.is_err(),
105                "Expect to fail: {source_text} but passed..."
106            );
107            // println!("{:?}",
108            // result.unwrap_err().with_source_code(source_text));
109        }
110    }
111
112    #[test]
113    fn should_fail_early_errors() {
114        for source_text in [
115            r#""invalid octal \777""#,
116            r#""invalid non-octal decimal \9""#,
117        ] {
118            // These are allowed in non-strict mode.
119            let result = Parser::new(
120                source_text,
121                Options {
122                    strict_mode: false,
123                    ..Options::default()
124                },
125            )
126            .parse();
127            assert!(
128                result.is_ok(),
129                "Expect to parse: {source_text} but failed..."
130            );
131
132            // But not in strict mode.
133            let result = Parser::new(
134                source_text,
135                Options {
136                    strict_mode: true,
137                    ..Options::default()
138                },
139            )
140            .parse();
141            assert!(
142                result.is_err(),
143                "Expect to fail w/ early error: {source_text} but passed..."
144            );
145            // println!("{:?}",
146            // result.unwrap_err().with_source_code(source_text));
147        }
148    }
149
150    #[test]
151    fn parse_quotes() {
152        let options = Options::default();
153
154        let ast = Parser::new(r#""double""#, options).parse().unwrap();
155        assert_eq!(ast.kind, ast::StringLiteralKind::Double);
156
157        let ast = Parser::new(r"'single'", options).parse().unwrap();
158        assert_eq!(ast.kind, ast::StringLiteralKind::Single);
159    }
160
161    #[test]
162    fn should_combine_surrogate_pair() {
163        let source_text = "'šŸ‘ˆšŸ»(=2+2)'";
164
165        let ast = Parser::new(
166            source_text,
167            Options {
168                combine_surrogate_pair: false,
169                ..Options::default()
170            },
171        )
172        .parse()
173        .unwrap();
174        assert_eq!(ast.body.len(), 10);
175
176        let ast = Parser::new(
177            source_text,
178            Options {
179                combine_surrogate_pair: true,
180                ..Options::default()
181            },
182        )
183        .parse()
184        .unwrap();
185        assert_eq!(ast.body.len(), 8);
186    }
187
188    #[test]
189    fn span_offset() {
190        let source_text = "\"Adjust span but should have no side effect for parsing\"";
191        let ret1 = Parser::new(
192            source_text,
193            Options {
194                span_offset: 0,
195                ..Options::default()
196            },
197        )
198        .parse()
199        .unwrap();
200        let ret2 = Parser::new(
201            source_text,
202            Options {
203                span_offset: 10,
204                ..Options::default()
205            },
206        )
207        .parse()
208        .unwrap();
209
210        assert_ne!(ret1.span, ret2.span);
211        for (a, b) in ret1.body.iter().zip(ret2.body.iter()) {
212            assert_ne!(a.span, b.span);
213        }
214    }
215
216    #[test]
217    fn restore_span() {
218        let source_text = "'123'";
219        let ast = Parser::new(
220            source_text,
221            Options {
222                span_offset: 0,
223                combine_surrogate_pair: false,
224                strict_mode: false,
225            },
226        )
227        .parse()
228        .unwrap();
229
230        assert_eq!(ast.span.source_text(source_text), source_text);
231
232        let source_text = "\"Hi,\\nšŸ¦„\\w\"";
233        let ast = Parser::new(
234            source_text,
235            Options {
236                span_offset: 0,
237                combine_surrogate_pair: true,
238                strict_mode: false,
239            },
240        )
241        .parse()
242        .unwrap();
243
244        assert_eq!(ast.span.source_text(source_text), source_text);
245        let mut units = ast.body.iter();
246        assert_eq!(units.next().unwrap().span.source_text(source_text), r"H");
247        assert_eq!(units.next().unwrap().span.source_text(source_text), r"i");
248        assert_eq!(units.next().unwrap().span.source_text(source_text), r",");
249        assert_eq!(units.next().unwrap().span.source_text(source_text), r"\n");
250        assert_eq!(units.next().unwrap().span.source_text(source_text), r"šŸ¦„");
251        assert_eq!(units.next().unwrap().span.source_text(source_text), r"\w");
252        assert!(units.next().is_none());
253
254        let source_text = "...'<-HERE->'...";
255        let ast = Parser::new(
256            &source_text[3..13],
257            Options {
258                span_offset: 3,
259                ..Options::default()
260            },
261        )
262        .parse()
263        .unwrap();
264
265        assert_eq!(ast.span.source_text(source_text), "'<-HERE->'");
266        let mut units = ast.body.iter();
267        assert_eq!(units.next().unwrap().span.source_text(source_text), r"<");
268        assert_eq!(units.next().unwrap().span.source_text(source_text), r"-");
269        assert_eq!(units.next().unwrap().span.source_text(source_text), r"H");
270        assert_eq!(units.next().unwrap().span.source_text(source_text), r"E");
271        assert_eq!(units.next().unwrap().span.source_text(source_text), r"R");
272        assert_eq!(units.next().unwrap().span.source_text(source_text), r"E");
273        assert_eq!(units.next().unwrap().span.source_text(source_text), r"-");
274        assert_eq!(units.next().unwrap().span.source_text(source_text), r">");
275        assert!(units.next().is_none());
276    }
277
278    #[test]
279    fn regexp_literal() {
280        let source_text1 = r"re = new RegExp('^12🄳3\\d(?=4)\\\\$')";
281        let offset1 = (16, 39);
282
283        let source_text2 = r"re = /^12🄳3\d(?=4)\\$/";
284        let offset2 = (6, 24);
285
286        let combine_surrogate_pair = false;
287
288        let ret1 = Parser::new(
289            &source_text1[offset1.0..offset1.1],
290            Options {
291                span_offset: u32::try_from(offset1.0).unwrap(),
292                combine_surrogate_pair,
293                strict_mode: false,
294            },
295        )
296        .parse()
297        .unwrap()
298        .body;
299        let ret2 = parse_regexp_literal(
300            &source_text2[offset2.0..offset2.1],
301            u32::try_from(offset2.0).unwrap(),
302            combine_surrogate_pair,
303        );
304
305        assert_eq!(ret1.len(), ret2.len());
306        for (a, b) in ret1.iter().zip(ret2.iter()) {
307            assert_eq!(a.value, b.value);
308        }
309    }
310}