swc_ecma_regexp/parser/reader/
mod.rs

1mod reader_impl;
2mod string_literal_parser;
3
4pub use reader_impl::Reader;
5
6#[cfg(test)]
7mod test {
8    use crate::parser::reader::Reader;
9
10    #[test]
11    fn should_fail() {
12        for reader in [
13            Reader::initialize(r#""Unterminated"#, true, true),
14            Reader::initialize(r#""Unterminated"#, false, true),
15            Reader::initialize("'Unterminated!", true, true),
16            Reader::initialize("'Unterminated!", false, true),
17        ] {
18            assert!(reader.is_err());
19        }
20    }
21
22    #[test]
23    fn should_pass_basic() {
24        for mut reader in [
25            Reader::initialize("RegExp!", true, false).unwrap(),
26            Reader::initialize("RegExp!", false, false).unwrap(),
27            Reader::initialize(r#""RegExp!""#, true, true).unwrap(),
28            Reader::initialize(r#""RegExp!""#, false, true).unwrap(),
29            Reader::initialize("'RegExp!'", true, true).unwrap(),
30            Reader::initialize("'RegExp!'", false, true).unwrap(),
31        ] {
32            assert_eq!(reader.peek(), Some('R' as u32));
33            assert_eq!(reader.peek2(), Some('e' as u32));
34            assert!(reader.eat('R'));
35            assert!(!reader.eat('R'));
36            assert!(reader.eat('e'));
37            assert!(reader.eat('g'));
38            assert!(reader.eat('E'));
39            assert!(!reader.eat3('E', 'x', 'p'));
40            assert!(reader.eat2('x', 'p'));
41
42            let checkpoint = reader.checkpoint();
43            assert_eq!(reader.peek(), Some('!' as u32));
44            reader.advance();
45            reader.advance();
46
47            reader.rewind(checkpoint);
48            assert_eq!(reader.peek(), Some('!' as u32));
49
50            assert!(reader.eat('!'));
51            assert_eq!(reader.peek(), None);
52        }
53    }
54
55    #[test]
56    fn should_pass_unicode() {
57        let source_text = "𠮷野家は👈🏻あっち";
58
59        let mut unicode_reader = Reader::initialize(source_text, true, false).unwrap();
60        assert!(unicode_reader.eat('𠮷')); // Can eat
61        assert!(unicode_reader.eat2('野', '家'));
62        let checkpoint = unicode_reader.checkpoint();
63        assert!(unicode_reader.eat('は'));
64        unicode_reader.advance(); // Emoji
65        unicode_reader.advance(); // Skin tone
66        assert!(unicode_reader.eat('あ'));
67        assert_eq!(unicode_reader.peek(), Some('っ' as u32));
68        assert_eq!(unicode_reader.peek2(), Some('ち' as u32));
69        unicode_reader.rewind(checkpoint);
70        assert!(unicode_reader.eat('は'));
71
72        let mut legacy_reader = Reader::initialize(source_text, false, false).unwrap();
73        assert!(!legacy_reader.eat('𠮷')); // Can not eat
74        legacy_reader.advance();
75        assert!(!legacy_reader.eat('𠮷')); // Also can not
76        legacy_reader.advance();
77        assert!(legacy_reader.eat('野'));
78        assert!(legacy_reader.eat('家'));
79        let checkpoint = unicode_reader.checkpoint();
80        assert!(legacy_reader.eat('は'));
81        legacy_reader.advance(); // Emoji(High surrogate)
82        legacy_reader.advance(); // Emoji(Low surrogate)
83        legacy_reader.advance(); // Skin tone(High surrogate)
84        legacy_reader.advance(); // Skin tone(Low surrogate)
85        assert_eq!(legacy_reader.peek(), Some('あ' as u32));
86        assert_eq!(legacy_reader.peek2(), Some('っ' as u32));
87        assert!(legacy_reader.eat3('あ', 'っ', 'ち'));
88        legacy_reader.rewind(checkpoint);
89        assert!(legacy_reader.eat('は'));
90    }
91
92    #[test]
93    fn span_position() {
94        let source_text1 = r"^ Catch😎 @@ symbols🇺🇳 $";
95        let reader1 = Reader::initialize(source_text1, true, false).unwrap();
96
97        let source_text2 = format!("\"{source_text1}\"");
98        let reader2 = Reader::initialize(&source_text2, true, true).unwrap();
99
100        for mut reader in [reader1, reader2] {
101            while reader.peek() != Some('^' as u32) {
102                reader.advance();
103            }
104            let s1 = reader.offset();
105            assert!(reader.eat('^'));
106            let e1 = reader.offset();
107            assert_eq!(&reader.atom(s1, e1), "^");
108
109            while reader.peek() != Some('@' as u32) {
110                reader.advance();
111            }
112            let s2 = reader.offset();
113            assert!(reader.eat('@'));
114            assert!(reader.eat('@'));
115            let e2 = reader.offset();
116            assert_eq!(&reader.atom(s2, e2), "@@");
117
118            while reader.peek() != Some('$' as u32) {
119                reader.advance();
120            }
121            let s3 = reader.offset();
122            assert!(reader.eat('$'));
123            let e3 = reader.offset();
124
125            assert_eq!(&reader.atom(s3, e3), "$");
126        }
127    }
128
129    #[test]
130    fn handle_escapes() {
131        let mut reader1 = Reader::initialize("こんにちWorld2024", true, false).unwrap();
132        let mut reader2 = Reader::initialize(
133            r#""\u3053\u3093\u306B\u3061\u0057\u006F\u0072\u006C\u0064\u0032\u0030\u0032\u0034""#,
134            false,
135            true,
136        )
137        .unwrap();
138
139        assert_eq!(reader1.eat('こ'), reader2.eat('こ'));
140        assert_eq!(reader1.eat('ん'), reader2.eat('ん'));
141
142        loop {
143            match (reader1.peek(), reader2.peek()) {
144                (None, None) => {
145                    break; // All passed
146                }
147                (Some(cp1), Some(cp2)) if cp1 == cp2 => {
148                    reader1.advance();
149                    reader2.advance();
150                    // println!("{} == {}: {:?}", cp1, cp2,
151                    // char::try_from(cp1));
152                }
153                _ => {
154                    panic!("Mismatched characters");
155                }
156            }
157        }
158    }
159}