swc_html_parser/parser/
open_elements_stack.rs

1use swc_html_ast::*;
2
3use crate::parser::{
4    is_html_integration_point, is_mathml_text_integration_point, is_same_node, Data, RcNode,
5};
6
7static IMPLICIT_END_TAG_REQUIRED: &[&str] = &[
8    "dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc",
9];
10
11static IMPLICIT_END_TAG_REQUIRED_THOROUGHLY: &[&str] = &[
12    "caption", "colgroup", "dd", "dt", "li", "optgroup", "option", "p", "rb", "rp", "rt", "rtc",
13    "tbody", "td", "tfoot", "th", "thead", "tr",
14];
15
16static SPECIFIC_SCOPE: &[(&str, Namespace)] = &[
17    ("applet", Namespace::HTML),
18    ("caption", Namespace::HTML),
19    ("html", Namespace::HTML),
20    ("marquee", Namespace::HTML),
21    ("object", Namespace::HTML),
22    ("table", Namespace::HTML),
23    ("td", Namespace::HTML),
24    ("template", Namespace::HTML),
25    ("th", Namespace::HTML),
26    ("annotation-xml", Namespace::MATHML),
27    ("mi", Namespace::MATHML),
28    ("mn", Namespace::MATHML),
29    ("mo", Namespace::MATHML),
30    ("ms", Namespace::MATHML),
31    ("mtext", Namespace::MATHML),
32    ("desc", Namespace::SVG),
33    ("foreignObject", Namespace::SVG),
34    ("title", Namespace::SVG),
35];
36
37static LIST_ITEM_SCOPE: &[(&str, Namespace)] = &[
38    ("applet", Namespace::HTML),
39    ("caption", Namespace::HTML),
40    ("html", Namespace::HTML),
41    ("marquee", Namespace::HTML),
42    ("object", Namespace::HTML),
43    ("table", Namespace::HTML),
44    ("td", Namespace::HTML),
45    ("template", Namespace::HTML),
46    ("th", Namespace::HTML),
47    ("annotation-xml", Namespace::MATHML),
48    ("mi", Namespace::MATHML),
49    ("mn", Namespace::MATHML),
50    ("mo", Namespace::MATHML),
51    ("ms", Namespace::MATHML),
52    ("mtext", Namespace::MATHML),
53    ("desc", Namespace::SVG),
54    ("foreignObject", Namespace::SVG),
55    ("title", Namespace::SVG),
56    ("ol", Namespace::HTML),
57    ("ul", Namespace::HTML),
58];
59
60static BUTTON_SCOPE: &[(&str, Namespace)] = &[
61    ("applet", Namespace::HTML),
62    ("caption", Namespace::HTML),
63    ("html", Namespace::HTML),
64    ("marquee", Namespace::HTML),
65    ("object", Namespace::HTML),
66    ("table", Namespace::HTML),
67    ("td", Namespace::HTML),
68    ("template", Namespace::HTML),
69    ("th", Namespace::HTML),
70    ("annotation-xml", Namespace::MATHML),
71    ("mi", Namespace::MATHML),
72    ("mn", Namespace::MATHML),
73    ("mo", Namespace::MATHML),
74    ("ms", Namespace::MATHML),
75    ("mtext", Namespace::MATHML),
76    ("desc", Namespace::SVG),
77    ("foreignObject", Namespace::SVG),
78    ("title", Namespace::SVG),
79    ("button", Namespace::HTML),
80];
81
82static TABLE_SCOPE: &[(&str, Namespace)] = &[
83    ("html", Namespace::HTML),
84    ("table", Namespace::HTML),
85    ("template", Namespace::HTML),
86];
87
88static SELECT_SCOPE: &[(&str, Namespace)] =
89    &[("optgroup", Namespace::HTML), ("option", Namespace::HTML)];
90
91pub struct OpenElementsStack {
92    pub items: Vec<RcNode>,
93    template_element_count: usize,
94}
95
96impl OpenElementsStack {
97    pub fn new() -> Self {
98        OpenElementsStack {
99            items: Vec::with_capacity(16),
100            template_element_count: 0,
101        }
102    }
103
104    pub fn push(&mut self, node: RcNode) {
105        if is_html_element!(node, "template") {
106            self.template_element_count += 1;
107        }
108
109        self.items.push(node);
110    }
111
112    pub fn pop(&mut self) -> Option<RcNode> {
113        let popped = self.items.pop();
114
115        if let Some(node) = &popped {
116            if is_html_element!(node, "template") {
117                self.template_element_count -= 1;
118            }
119        }
120
121        popped
122    }
123
124    pub fn insert(&mut self, index: usize, node: RcNode) {
125        if is_html_element!(node, "template") {
126            self.template_element_count += 1;
127        }
128
129        self.items.insert(index, node);
130    }
131
132    pub fn replace(&mut self, index: usize, node: RcNode) {
133        if let Some(item) = self.items.get(index) {
134            if is_html_element!(item, "template") {
135                self.template_element_count -= 1;
136            }
137
138            if is_html_element!(node, "template") {
139                self.template_element_count += 1;
140            }
141
142            self.items[index] = node;
143        }
144    }
145
146    pub fn remove(&mut self, node: &RcNode) {
147        let position = self.items.iter().rposition(|x| is_same_node(node, x));
148
149        if let Some(position) = position {
150            if is_html_element!(node, "template") {
151                self.template_element_count -= 1;
152            }
153
154            self.items.remove(position);
155        }
156    }
157
158    pub fn contains_template_element(&self) -> bool {
159        self.template_element_count > 0
160    }
161
162    // The stack of open elements is said to have an element target node in a
163    // specific scope consisting of a list of element types list when the following
164    // algorithm terminates in a match state:
165    fn has_element_target_node_in_specific_scope(
166        &self,
167        tag_name: &str,
168        list: &[(&str, Namespace)],
169    ) -> bool {
170        let mut iter = self.items.iter().rev();
171        // 1. Initialize node to be the current node (the bottommost node of the stack).
172        let mut node = iter.next();
173
174        while let Some(inner_node) = node {
175            // 2. If node is the target node, terminate in a match state.
176            if get_tag_name!(inner_node) == tag_name
177                && get_namespace!(inner_node) == Namespace::HTML
178            {
179                return true;
180            }
181
182            // 3. Otherwise, if node is one of the element types in list, terminate in a
183            // failure state.
184            for element_and_ns in list {
185                if get_tag_name!(inner_node) == element_and_ns.0
186                    && get_namespace!(inner_node) == element_and_ns.1
187                {
188                    return false;
189                }
190            }
191
192            // 4. Otherwise, set node to the previous entry in the stack of open elements
193            // and return to step 2. (This will never fail, since the loop will always
194            // terminate in the previous step if the top of the stack — an html element — is
195            // reached.)
196            node = iter.next();
197        }
198
199        false
200    }
201
202    // The stack of open elements is said to have a particular element in scope when
203    // it has that element in the specific scope consisting of the following element
204    // types:
205    //
206    // applet
207    // caption
208    // html
209    // table
210    // td
211    // th
212    // marquee
213    // object
214    // template
215    // MathML mi
216    // MathML mo
217    // MathML mn
218    // MathML ms
219    // MathML mtext
220    // MathML annotation-xml
221    // SVG foreignObject
222    // SVG desc
223    // SVG title
224    pub fn has_in_scope(&self, tag_name: &str) -> bool {
225        self.has_element_target_node_in_specific_scope(tag_name, SPECIFIC_SCOPE)
226    }
227
228    pub fn has_node_in_scope(&self, target: &RcNode) -> bool {
229        let mut iter = self.items.iter().rev();
230        // 1. Initialize node to be the current node (the bottommost node of the stack).
231        let mut node = iter.next();
232
233        while let Some(inner_node) = node {
234            // 2. If node is the target node, terminate in a match state.
235            if is_same_node(target, inner_node) {
236                return true;
237            }
238
239            // 3. Otherwise, if node is one of the element types in list, terminate in a
240            // failure state.
241            for element_and_ns in SPECIFIC_SCOPE {
242                if get_tag_name!(inner_node) == element_and_ns.0
243                    && get_namespace!(inner_node) == element_and_ns.1
244                {
245                    return false;
246                }
247            }
248
249            // 4. Otherwise, set node to the previous entry in the stack of open elements
250            // and return to step 2. (This will never fail, since the loop will always
251            // terminate in the previous step if the top of the stack — an html element — is
252            // reached.)
253            node = iter.next();
254        }
255
256        false
257    }
258
259    // The stack of open elements is said to have a particular element in list item
260    // scope when it has that element in the specific scope consisting of the
261    // following element types:
262    //
263    // All the element types listed above for the has an element in scope algorithm.
264    // ol in the HTML namespace
265    // ul in the HTML namespace
266    pub fn has_in_list_item_scope(&self, tag_name: &str) -> bool {
267        self.has_element_target_node_in_specific_scope(tag_name, LIST_ITEM_SCOPE)
268    }
269
270    // The stack of open elements is said to have a particular element in button
271    // scope when it has that element in the specific scope consisting of the
272    // following element types:
273    //
274    // All the element types listed above for the has an element in scope algorithm.
275    // button in the HTML namespace
276    pub fn has_in_button_scope(&self, tag_name: &str) -> bool {
277        self.has_element_target_node_in_specific_scope(tag_name, BUTTON_SCOPE)
278    }
279
280    // The stack of open elements is said to have a particular element in table
281    // scope when it has that element in the specific scope consisting of the
282    // following element types:
283    //
284    // html in the HTML namespace
285    // table in the HTML namespace
286    // template in the HTML namespace
287    pub fn has_in_table_scope(&self, tag_name: &str) -> bool {
288        self.has_element_target_node_in_specific_scope(tag_name, TABLE_SCOPE)
289    }
290
291    // The stack of open elements is said to have a particular element in select
292    // scope when it has that element in the specific scope consisting of all
293    // element types except the following:
294    //
295    // optgroup in the HTML namespace
296    // option in the HTML namespace
297    pub fn has_in_select_scope(&self, tag_name: &str) -> bool {
298        let mut iter = self.items.iter().rev();
299        // 1. Initialize node to be the current node (the bottommost node of the stack).
300        let mut node = iter.next();
301
302        while let Some(inner_node) = node {
303            // 2. If node is the target node, terminate in a match state.
304            if get_tag_name!(inner_node) == tag_name
305                && get_namespace!(inner_node) == Namespace::HTML
306            {
307                return true;
308            }
309
310            // 3. Otherwise, if node is one of the element types in list, terminate in a
311            // failure state.
312            if SELECT_SCOPE.iter().all(|(tag_name, namespace)| {
313                get_tag_name!(inner_node) != *tag_name && get_namespace!(inner_node) != *namespace
314            }) {
315                return false;
316            }
317
318            // 4. Otherwise, set node to the previous entry in the stack of open elements
319            // and return to step 2. (This will never fail, since the loop will always
320            // terminate in the previous step if the top of the stack — an html element — is
321            // reached.)
322            node = iter.next();
323        }
324
325        false
326    }
327
328    // When the steps above require the UA to clear the stack back to a table
329    // context, it means that the UA must, while the current node is not a table,
330    // template, or html element, pop elements from the stack of open elements.
331    pub fn clear_back_to_table_context(&mut self) {
332        while let Some(node) = self.items.last() {
333            if !is_html_element!(node, "table" | "template" | "html") {
334                self.pop();
335            } else {
336                break;
337            }
338        }
339    }
340
341    // When the steps above require the UA to clear the stack back to a table row
342    // context, it means that the UA must, while the current node is not a tr,
343    // template, or html element, pop elements from the stack of open elements.
344    pub fn clear_back_to_table_row_context(&mut self) {
345        while let Some(node) = self.items.last() {
346            if !is_html_element!(node, "tr" | "template" | "html") {
347                self.pop();
348            } else {
349                break;
350            }
351        }
352    }
353
354    // When the steps above require the UA to clear the stack back to a table body
355    // context, it means that the UA must, while the current node is not a tbody,
356    // tfoot, thead, template, or html element, pop elements from the stack of open
357    // elements.
358    pub fn clear_back_to_table_body_context(&mut self) {
359        while let Some(node) = self.items.last() {
360            if !is_html_element!(node, "thead" | "tfoot" | "tbody" | "template" | "html") {
361                self.pop();
362            } else {
363                break;
364            }
365        }
366    }
367
368    // When the steps below require the UA to generate implied end tags, then, while
369    // the current node is a dd element, a dt element, an li element, an optgroup
370    // element, an option element, a p element, an rb element, an rp element, an rt
371    // element, or an rtc element, the UA must pop the current node off the stack of
372    // open elements.
373    //
374    // If a step requires the UA to generate implied end tags but lists an element
375    // to exclude from the process, then the UA must perform the above steps as if
376    // that element was not in the above list.
377    pub fn generate_implied_end_tags(&mut self) {
378        while let Some(node) = self.items.last() {
379            if IMPLICIT_END_TAG_REQUIRED.contains(&get_tag_name!(node))
380                && get_namespace!(node) == Namespace::HTML
381            {
382                self.pop();
383            } else {
384                break;
385            }
386        }
387    }
388
389    pub fn generate_implied_end_tags_with_exclusion(&mut self, tag_name: &str) {
390        while let Some(node) = self.items.last() {
391            if is_html_element_with_tag_name!(node, tag_name) {
392                break;
393            }
394
395            if IMPLICIT_END_TAG_REQUIRED.contains(&get_tag_name!(node))
396                && get_namespace!(node) == Namespace::HTML
397            {
398                self.pop();
399            } else {
400                break;
401            }
402        }
403    }
404
405    // When the steps below require the UA to generate all implied end tags
406    // thoroughly, then, while the current node is a caption element, a colgroup
407    // element, a dd element, a dt element, an li element, an optgroup element, an
408    // option element, a p element, an rb element, an rp element, an rt element, an
409    // rtc element, a tbody element, a td element, a tfoot element, a th element, a
410    // thead element, or a tr element, the UA must pop the current node off the
411    // stack of open elements.
412    pub fn generate_implied_end_tags_thoroughly(&mut self) {
413        while let Some(node) = self.items.last() {
414            if IMPLICIT_END_TAG_REQUIRED_THOROUGHLY.contains(&get_tag_name!(node))
415                && get_namespace!(node) == Namespace::HTML
416            {
417                self.pop();
418            } else {
419                break;
420            }
421        }
422    }
423
424    pub fn pop_until_tag_name_popped(&mut self, tag_name: &[&str]) -> Option<RcNode> {
425        while let Some(node) = self.pop() {
426            if tag_name.contains(&get_tag_name!(node)) && get_namespace!(node) == Namespace::HTML {
427                return Some(node);
428            }
429        }
430
431        None
432    }
433
434    pub fn pop_until_node(&mut self, until_to_node: &RcNode) -> Option<RcNode> {
435        while let Some(node) = self.pop() {
436            if is_same_node(&node, until_to_node) {
437                return Some(node);
438            }
439        }
440
441        None
442    }
443
444    // While the current node is not a MathML text integration point, an HTML
445    // integration point, or an element in the HTML namespace, pop elements from
446    // the stack of open elements.
447    pub fn pop_until_in_foreign(&mut self) {
448        while let Some(node) = self.items.last() {
449            match &node.data {
450                Data::Element { namespace, .. } if *namespace == Namespace::HTML => {
451                    break;
452                }
453                _ if is_mathml_text_integration_point(Some(node))
454                    || is_html_integration_point(Some(node)) =>
455                {
456                    break;
457                }
458                _ => {}
459            }
460
461            self.pop();
462        }
463    }
464}