swc_ecma_regexp/parser/reader/string_literal_parser/
mod.rs1pub mod ast;
2mod characters;
3mod diagnostics;
4mod options;
5mod parser_impl;
6
7pub use options::Options;
8pub use parser_impl::{parse_regexp_literal, Parser};
9
10#[cfg(test)]
11mod test {
12 use swc_common::Span;
13
14 use super::{ast, parse_regexp_literal, Options, Parser};
15
16 trait SpanExt {
17 fn source_text(self, source_text: &str) -> &str;
18 }
19
20 impl SpanExt for Span {
21 fn source_text(self, source_text: &str) -> &str {
22 &source_text[self.lo.0 as usize..self.hi.0 as usize]
23 }
24 }
25
26 #[test]
27 fn should_pass() {
28 for source_text in [
29 r#""""#,
30 r"''",
31 r#""Hello, world!""#,
32 r"'Hello, world!'",
33 r#""He said, \"Hello!\"""#,
34 r#"'She said, "Hello!"'"#,
35 r"'It\'s a sunny day'",
36 "'Line1\\nLine2'",
37 "'Column1\tColumn2'",
38 r#""Path to file: C:\\Program Files\\MyApp""#,
39 r"'Backspace\bTest'",
40 r"'FormFeed\fTest'",
41 "'CarriageReturn\\rTest'",
42 r"'VerticalTab\vTest'",
43 r#""NullChar\0Test""#,
44 r#""Hex A: \x41""#,
45 r#""Unicode A: \u0041""#,
46 r#""Smiley: š""#,
47 r#""Smiley: \u{1F600}""#,
48 r#""Octal 7: \7""#,
49 r#""Octal 77: \77""#,
50 r#""Octal 123: \123""#,
51 r#""Non-octal digits: \8\9""#,
52 r#""This is a long string\
53 that spans\
54 multiple lines""#,
55 r"'Line separator:\u2028Test'",
56 r"'Paragraph separator:\u2029Test'",
57 r#""NonEscapeCharacter: \c""#,
58 r#""Zero followed by 8 and 9: \08\09""#,
59 r#""NonZeroOctalDigit not followed by OctalDigit: \1x""#,
60 r#""ZeroToThree OctalDigit not followed by OctalDigit: \33x""#,
61 r#""FourToSeven OctalDigit: \47""#,
62 r#""Unicode brace escape: \u{0041}""#,
63 r#""Escaped backslash and quote: \\\"""#,
64 r#""Invalid escape: \@""#,
65 r#""He said, 'Hello!'""#,
66 r#"'She replied, "Hi!"'"#,
67 r#""Multiple escapes: \n\t\r""#,
68 r#""Longest octal escape: \377""#,
69 r#""Number at end: \1234""#,
70 r#""Escape followed by letter: \1a""#,
71 r#""This is a long string that spans\
72 multiple lines using\
73 multiple continuations""#,
74 r#""Not an escape: \\g""#,
75 r#""Surrogate pair: \uD83D\uDE00""#,
76 r#""Line continuation with terminator: \
77\r\n""#,
78 r#""Special chars: !@#$%^&*()_+-=[]{}|;':,.<>/?\"''""#,
79 r#""Combined escapes: \n\\\"\t\u0041\x42""#,
80 ] {
81 if let Err(err) = Parser::new(source_text, Options::default()).parse() {
82 panic!("Expect to parse: {source_text} but failed: {err}");
83 }
84 }
85 }
86
87 #[test]
88 fn should_fail() {
89 for source_text in [
90 r"Not quoted",
91 r"'Unterminated",
92 r#""Line terminator
93without continuation""#,
94 r#""Invalid hex escape: \xG1""#,
95 r#""Invalid escapes: \x\y\z""#,
96 r#""Invalid unicode escape: \u00G1""#,
97 r#""Invalid unicode brace escape: \u{G1}""#,
98 r#""Too many digits: \u{1234567}""#,
99 r#""str"+'str'"#,
100 r#"'str'+"str""#,
101 ] {
102 let result = Parser::new(source_text, Options::default()).parse();
103 assert!(
104 result.is_err(),
105 "Expect to fail: {source_text} but passed..."
106 );
107 }
110 }
111
112 #[test]
113 fn should_fail_early_errors() {
114 for source_text in [
115 r#""invalid octal \777""#,
116 r#""invalid non-octal decimal \9""#,
117 ] {
118 let result = Parser::new(
120 source_text,
121 Options {
122 strict_mode: false,
123 ..Options::default()
124 },
125 )
126 .parse();
127 assert!(
128 result.is_ok(),
129 "Expect to parse: {source_text} but failed..."
130 );
131
132 let result = Parser::new(
134 source_text,
135 Options {
136 strict_mode: true,
137 ..Options::default()
138 },
139 )
140 .parse();
141 assert!(
142 result.is_err(),
143 "Expect to fail w/ early error: {source_text} but passed..."
144 );
145 }
148 }
149
150 #[test]
151 fn parse_quotes() {
152 let options = Options::default();
153
154 let ast = Parser::new(r#""double""#, options).parse().unwrap();
155 assert_eq!(ast.kind, ast::StringLiteralKind::Double);
156
157 let ast = Parser::new(r"'single'", options).parse().unwrap();
158 assert_eq!(ast.kind, ast::StringLiteralKind::Single);
159 }
160
161 #[test]
162 fn should_combine_surrogate_pair() {
163 let source_text = "'šš»(=2+2)'";
164
165 let ast = Parser::new(
166 source_text,
167 Options {
168 combine_surrogate_pair: false,
169 ..Options::default()
170 },
171 )
172 .parse()
173 .unwrap();
174 assert_eq!(ast.body.len(), 10);
175
176 let ast = Parser::new(
177 source_text,
178 Options {
179 combine_surrogate_pair: true,
180 ..Options::default()
181 },
182 )
183 .parse()
184 .unwrap();
185 assert_eq!(ast.body.len(), 8);
186 }
187
188 #[test]
189 fn span_offset() {
190 let source_text = "\"Adjust span but should have no side effect for parsing\"";
191 let ret1 = Parser::new(
192 source_text,
193 Options {
194 span_offset: 0,
195 ..Options::default()
196 },
197 )
198 .parse()
199 .unwrap();
200 let ret2 = Parser::new(
201 source_text,
202 Options {
203 span_offset: 10,
204 ..Options::default()
205 },
206 )
207 .parse()
208 .unwrap();
209
210 assert_ne!(ret1.span, ret2.span);
211 for (a, b) in ret1.body.iter().zip(ret2.body.iter()) {
212 assert_ne!(a.span, b.span);
213 }
214 }
215
216 #[test]
217 fn restore_span() {
218 let source_text = "'123'";
219 let ast = Parser::new(
220 source_text,
221 Options {
222 span_offset: 0,
223 combine_surrogate_pair: false,
224 strict_mode: false,
225 },
226 )
227 .parse()
228 .unwrap();
229
230 assert_eq!(ast.span.source_text(source_text), source_text);
231
232 let source_text = "\"Hi,\\nš¦\\w\"";
233 let ast = Parser::new(
234 source_text,
235 Options {
236 span_offset: 0,
237 combine_surrogate_pair: true,
238 strict_mode: false,
239 },
240 )
241 .parse()
242 .unwrap();
243
244 assert_eq!(ast.span.source_text(source_text), source_text);
245 let mut units = ast.body.iter();
246 assert_eq!(units.next().unwrap().span.source_text(source_text), r"H");
247 assert_eq!(units.next().unwrap().span.source_text(source_text), r"i");
248 assert_eq!(units.next().unwrap().span.source_text(source_text), r",");
249 assert_eq!(units.next().unwrap().span.source_text(source_text), r"\n");
250 assert_eq!(units.next().unwrap().span.source_text(source_text), r"š¦");
251 assert_eq!(units.next().unwrap().span.source_text(source_text), r"\w");
252 assert!(units.next().is_none());
253
254 let source_text = "...'<-HERE->'...";
255 let ast = Parser::new(
256 &source_text[3..13],
257 Options {
258 span_offset: 3,
259 ..Options::default()
260 },
261 )
262 .parse()
263 .unwrap();
264
265 assert_eq!(ast.span.source_text(source_text), "'<-HERE->'");
266 let mut units = ast.body.iter();
267 assert_eq!(units.next().unwrap().span.source_text(source_text), r"<");
268 assert_eq!(units.next().unwrap().span.source_text(source_text), r"-");
269 assert_eq!(units.next().unwrap().span.source_text(source_text), r"H");
270 assert_eq!(units.next().unwrap().span.source_text(source_text), r"E");
271 assert_eq!(units.next().unwrap().span.source_text(source_text), r"R");
272 assert_eq!(units.next().unwrap().span.source_text(source_text), r"E");
273 assert_eq!(units.next().unwrap().span.source_text(source_text), r"-");
274 assert_eq!(units.next().unwrap().span.source_text(source_text), r">");
275 assert!(units.next().is_none());
276 }
277
278 #[test]
279 fn regexp_literal() {
280 let source_text1 = r"re = new RegExp('^12š„³3\\d(?=4)\\\\$')";
281 let offset1 = (16, 39);
282
283 let source_text2 = r"re = /^12š„³3\d(?=4)\\$/";
284 let offset2 = (6, 24);
285
286 let combine_surrogate_pair = false;
287
288 let ret1 = Parser::new(
289 &source_text1[offset1.0..offset1.1],
290 Options {
291 span_offset: u32::try_from(offset1.0).unwrap(),
292 combine_surrogate_pair,
293 strict_mode: false,
294 },
295 )
296 .parse()
297 .unwrap()
298 .body;
299 let ret2 = parse_regexp_literal(
300 &source_text2[offset2.0..offset2.1],
301 u32::try_from(offset2.0).unwrap(),
302 combine_surrogate_pair,
303 );
304
305 assert_eq!(ret1.len(), ret2.len());
306 for (a, b) in ret1.iter().zip(ret2.iter()) {
307 assert_eq!(a.value, b.value);
308 }
309 }
310}