swc_ecma_regexp_ast/
lib.rs

1use bitflags::bitflags;
2use is_macro::Is;
3use swc_atoms::Atom;
4use swc_common::{ast_node, EqIgnoreSpan, Span};
5
6mod display;
7
8/// The root of the `PatternParser` result.
9#[ast_node]
10#[derive(Eq, Hash, EqIgnoreSpan)]
11pub struct Pattern {
12    pub span: Span,
13    pub body: Disjunction,
14}
15
16/// Pile of [`Alternative`]s separated by `|`.
17#[ast_node]
18#[derive(Eq, Hash, EqIgnoreSpan)]
19pub struct Disjunction {
20    pub span: Span,
21    pub body: Vec<Alternative>,
22}
23
24/// Single unit of `|` separated alternatives.
25#[ast_node]
26#[derive(Eq, Hash, EqIgnoreSpan)]
27pub struct Alternative {
28    pub span: Span,
29    pub body: Vec<Term>,
30}
31
32/// Single unit of [`Alternative`], containing various kinds.
33#[ast_node(no_unknown)]
34#[derive(Eq, Hash, EqIgnoreSpan, Is)]
35pub enum Term {
36    // Assertion
37    // QuantifiableAssertion
38    #[tag("BoundaryAssertion")]
39    BoundaryAssertion(Box<BoundaryAssertion>),
40
41    #[tag("LookAroundAssertion")]
42    LookAroundAssertion(Box<LookAroundAssertion>),
43
44    #[tag("Quantifier")]
45    Quantifier(Box<Quantifier>),
46
47    // Atom
48    // ExtendedAtom
49    #[tag("Character")]
50    Character(Box<Character>),
51
52    #[tag("CharacterClassEscape")]
53    Dot(Dot),
54
55    #[tag("CharacterClassEscape")]
56    CharacterClassEscape(Box<CharacterClassEscape>),
57
58    #[tag("UnicodePropertyEscape")]
59    UnicodePropertyEscape(Box<UnicodePropertyEscape>),
60
61    #[tag("ClassStringDisjunction")]
62    CharacterClass(Box<CharacterClass>),
63
64    #[tag("ClassStringDisjunction")]
65    CapturingGroup(Box<CapturingGroup>),
66
67    #[tag("ClassStringDisjunction")]
68    IgnoreGroup(Box<IgnoreGroup>),
69
70    #[tag("ClassStringDisjunction")]
71    IndexedReference(Box<IndexedReference>),
72
73    #[tag("ClassStringDisjunction")]
74    NamedReference(Box<NamedReference>),
75}
76
77/// Simple form of assertion.
78/// e.g. `^`, `$`, `\b`, `\B`
79#[ast_node]
80#[derive(Eq, Hash, EqIgnoreSpan)]
81pub struct BoundaryAssertion {
82    pub span: Span,
83    #[use_eq]
84    pub kind: BoundaryAssertionKind,
85}
86
87#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
88pub enum BoundaryAssertionKind {
89    Start = 0,
90    End = 1,
91    Boundary = 2,
92    NegativeBoundary = 3,
93}
94
95/// Lookaround assertion.
96/// e.g. `(?=...)`, `(?!...)`, `(?<=...)`, `(?<!...)`
97#[ast_node]
98#[derive(Eq, Hash, EqIgnoreSpan)]
99pub struct LookAroundAssertion {
100    pub span: Span,
101    #[use_eq]
102    pub kind: LookAroundAssertionKind,
103    pub body: Disjunction,
104}
105
106#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
107pub enum LookAroundAssertionKind {
108    Lookahead = 0,
109    NegativeLookahead = 1,
110    Lookbehind = 2,
111    NegativeLookbehind = 3,
112}
113
114/// Quantifier holding a [`Term`] and its repetition count.
115/// e.g. `a*`, `b+`, `c?`, `d{3}`, `e{4,}`, `f{5,6}`
116#[ast_node]
117#[derive(Eq, Hash, EqIgnoreSpan)]
118pub struct Quantifier {
119    pub span: Span,
120    pub min: u64,
121    /// `None` means no upper bound.
122    pub max: Option<u64>,
123    pub greedy: bool,
124    pub body: Term,
125}
126
127/// Single character.
128#[ast_node]
129#[derive(Eq, Hash, EqIgnoreSpan)]
130pub struct Character {
131    /// This will be invalid position when `UnicodeMode` is disabled and `value`
132    /// is a surrogate pair.
133    pub span: Span,
134    #[use_eq]
135    pub kind: CharacterKind,
136    /// Unicode code point or UTF-16 code unit.
137    pub value: u32,
138}
139
140#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
141pub enum CharacterKind {
142    ControlLetter = 0,
143    HexadecimalEscape = 1,
144    Identifier = 2,
145    Null = 3,
146    // To distinguish leading 0 cases like `\00` and `\000`
147    Octal1 = 4,
148    Octal2 = 5,
149    Octal3 = 6,
150    SingleEscape = 7,
151    Symbol = 8,
152    UnicodeEscape = 9,
153}
154
155/// Character class.
156/// e.g. `\d`, `\D`, `\s`, `\S`, `\w`, `\W`
157#[ast_node]
158#[derive(Eq, Hash, EqIgnoreSpan)]
159pub struct CharacterClassEscape {
160    pub span: Span,
161    #[use_eq]
162    pub kind: CharacterClassEscapeKind,
163}
164
165#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
166pub enum CharacterClassEscapeKind {
167    D = 0,
168    NegativeD = 1,
169    S = 2,
170    NegativeS = 3,
171    W = 4,
172    NegativeW = 5,
173}
174
175/// Unicode property.
176/// e.g. `\p{ASCII}`, `\P{ASCII}`, `\p{sc=Hiragana}`, `\P{sc=Hiragana}`
177#[ast_node]
178#[derive(Eq, Hash, EqIgnoreSpan)]
179pub struct UnicodePropertyEscape {
180    pub span: Span,
181    pub negative: bool,
182    /// `true` if `UnicodeSetsMode` and `name` matches unicode property of
183    /// strings.
184    pub strings: bool,
185    pub name: Atom,
186    pub value: Option<Atom>,
187}
188
189/// The `.`.
190#[ast_node]
191#[derive(Eq, Hash, EqIgnoreSpan)]
192pub struct Dot {
193    pub span: Span,
194}
195
196/// Character class wrapped by `[]`.
197/// e.g. `[a-z]`, `[^A-Z]`, `[abc]`, `[a&&b&&c]`, `[[a-z]--x--y]`
198#[ast_node]
199#[derive(Eq, Hash, EqIgnoreSpan)]
200pub struct CharacterClass {
201    pub span: Span,
202    pub negative: bool,
203    /// `true` if:
204    /// - `body` contains [`UnicodePropertyEscape`], nested [`CharacterClass`]
205    ///   or [`ClassStringDisjunction`] which `strings` is `true`
206    /// - and matches each logic depends on `kind`
207    pub strings: bool,
208    #[use_eq]
209    pub kind: CharacterClassContentsKind,
210    pub body: Vec<CharacterClassContents>,
211}
212
213#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
214pub enum CharacterClassContentsKind {
215    Union = 0,
216    /// `UnicodeSetsMode` only.
217    Intersection = 1,
218    /// `UnicodeSetsMode` only.
219    Subtraction = 2,
220}
221
222#[ast_node(no_unknown)]
223#[derive(Eq, Hash, EqIgnoreSpan, Is)]
224pub enum CharacterClassContents {
225    #[tag("CharacterClassRange")]
226    CharacterClassRange(Box<CharacterClassRange>),
227
228    #[tag("CharacterClassEscape")]
229    CharacterClassEscape(Box<CharacterClassEscape>),
230
231    #[tag("UnicodePropertyEscape")]
232    UnicodePropertyEscape(Box<UnicodePropertyEscape>),
233
234    #[tag("Character")]
235    Character(Box<Character>),
236
237    /// `UnicodeSetsMode` only
238    #[tag("NestedCharacterClass")]
239    NestedCharacterClass(Box<CharacterClass>),
240
241    /// `UnicodeSetsMode` only
242    #[tag("ClassStringDisjunction")]
243    ClassStringDisjunction(Box<ClassStringDisjunction>),
244}
245
246/// `-` separated range of characters.
247/// e.g. `a-z`, `A-Z`, `0-9`
248#[ast_node]
249#[derive(Eq, Hash, EqIgnoreSpan)]
250pub struct CharacterClassRange {
251    pub span: Span,
252    pub min: Character,
253    pub max: Character,
254}
255
256/// `|` separated string of characters wrapped by `\q{}`.
257#[ast_node]
258#[derive(Eq, Hash, EqIgnoreSpan)]
259pub struct ClassStringDisjunction {
260    pub span: Span,
261    /// `true` if body is empty or contains [`ClassString`] which `strings` is
262    /// `true`.
263    pub strings: bool,
264    pub body: Vec<ClassString>,
265}
266
267/// Single unit of [`ClassStringDisjunction`].
268#[ast_node]
269#[derive(Eq, Hash, EqIgnoreSpan)]
270pub struct ClassString {
271    pub span: Span,
272    /// `true` if body is empty or contain 2 more characters.
273    pub strings: bool,
274    pub body: Vec<Character>,
275}
276
277/// Named or unnamed capturing group.
278/// e.g. `(...)`, `(?<name>...)`
279#[ast_node]
280#[derive(Eq, Hash, EqIgnoreSpan)]
281pub struct CapturingGroup {
282    pub span: Span,
283    /// Group name to be referenced by [`NamedReference`].
284    pub name: Option<Atom>,
285    pub body: Disjunction,
286}
287
288/// Pseudo-group for ignoring.
289/// e.g. `(?:...)`
290#[ast_node]
291#[derive(Eq, Hash, EqIgnoreSpan)]
292pub struct IgnoreGroup {
293    pub span: Span,
294    pub modifiers: Option<Modifiers>,
295    pub body: Disjunction,
296}
297
298/// Modifiers in [`IgnoreGroup`].
299/// e.g. `i` in `(?i:...)`, `-s` in `(?-s:...)`
300#[ast_node]
301#[derive(Eq, Hash, EqIgnoreSpan)]
302pub struct Modifiers {
303    pub span: Span,
304    #[use_eq]
305    pub enabling: Modifier,
306    #[use_eq]
307    pub disabling: Modifier,
308}
309
310bitflags! {
311    /// Each part of modifier in [`Modifiers`].
312    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
313    pub struct Modifier: u8 {
314        /// Ignore case flag
315        const I = 1 << 0;
316        /// Multiline flag
317        const M = 1 << 1;
318        /// DotAll flag
319        const S = 1 << 2;
320    }
321}
322
323/// Backreference by index.
324/// e.g. `\1`, `\2`, `\3`
325#[ast_node]
326#[derive(Eq, Hash, EqIgnoreSpan)]
327pub struct IndexedReference {
328    pub span: Span,
329    pub index: u32,
330}
331
332/// Backreference by name.
333/// e.g. `\k<name>`
334#[ast_node]
335#[derive(Eq, Hash, EqIgnoreSpan)]
336pub struct NamedReference {
337    pub span: Span,
338    pub name: Atom,
339}
340
341#[cfg(target_pointer_width = "64")]
342#[test]
343fn size_asserts() {
344    use std::mem::size_of;
345
346    assert!(size_of::<Term>() == 16);
347    assert!(size_of::<CharacterClassContents>() == 16);
348}