swc_html_parser/parser/mod.rs
1use std::{cell::RefCell, mem, rc::Rc};
2
3use active_formatting_element_stack::*;
4use doctypes::*;
5use node::*;
6use open_elements_stack::*;
7use swc_atoms::{atom, Atom};
8use swc_common::{Span, DUMMY_SP};
9use swc_html_ast::*;
10
11use self::input::{Buffer, ParserInput};
12use crate::{
13 error::{Error, ErrorKind},
14 lexer::State,
15};
16
17#[macro_use]
18mod macros;
19mod active_formatting_element_stack;
20mod doctypes;
21pub mod input;
22mod node;
23mod open_elements_stack;
24
25pub type PResult<T> = Result<T, Error>;
26
27#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
28pub struct ParserConfig {
29 pub scripting_enabled: bool,
30 pub iframe_srcdoc: bool,
31 // #8459
32 pub allow_self_closing: bool,
33}
34
35enum Bookmark<RcNode> {
36 Replace(RcNode),
37 InsertAfter(RcNode),
38}
39
40enum AdjustAttributes {
41 MathML,
42 Svg,
43}
44
45#[derive(Debug, Clone, Default)]
46enum InsertionMode {
47 #[default]
48 Initial,
49 BeforeHtml,
50 BeforeHead,
51 InHead,
52 InHeadNoScript,
53 AfterHead,
54 InBody,
55 Text,
56 InTable,
57 InTableText,
58 InCaption,
59 InColumnGroup,
60 InTableBody,
61 InRow,
62 InCell,
63 InSelect,
64 InSelectInTable,
65 InTemplate,
66 AfterBody,
67 InFrameset,
68 AfterFrameset,
69 AfterAfterBody,
70 AfterAfterFrameset,
71}
72
73enum InsertionPosition {
74 LastChild(RcNode),
75 BeforeSibling(RcNode),
76}
77
78pub struct Parser<I>
79where
80 I: ParserInput,
81{
82 #[allow(dead_code)]
83 config: ParserConfig,
84 input: Buffer<I>,
85 stopped: bool,
86 is_fragment_case: bool,
87 context_element: Option<RcNode>,
88 insertion_mode: InsertionMode,
89 original_insertion_mode: InsertionMode,
90 template_insertion_mode_stack: Vec<InsertionMode>,
91 document: Option<RcNode>,
92 head_element_pointer: Option<RcNode>,
93 form_element_pointer: Option<RcNode>,
94 open_elements_stack: OpenElementsStack,
95 active_formatting_elements: ActiveFormattingElementStack,
96 pending_character_tokens: Vec<TokenAndInfo>,
97 frameset_ok: bool,
98 foster_parenting_enabled: bool,
99 errors: Vec<Error>,
100}
101
102impl<I> Parser<I>
103where
104 I: ParserInput,
105{
106 pub fn new(input: I, config: ParserConfig) -> Self {
107 Parser {
108 config,
109 input: Buffer::new(input),
110 stopped: false,
111 is_fragment_case: false,
112 context_element: None,
113 insertion_mode: Default::default(),
114 original_insertion_mode: Default::default(),
115 template_insertion_mode_stack: Vec::with_capacity(16),
116 document: None,
117 head_element_pointer: None,
118 form_element_pointer: None,
119 open_elements_stack: OpenElementsStack::new(),
120 active_formatting_elements: ActiveFormattingElementStack::new(),
121 pending_character_tokens: Vec::with_capacity(16),
122 frameset_ok: true,
123 foster_parenting_enabled: false,
124 errors: Default::default(),
125 }
126 }
127
128 pub fn dump_cur(&mut self) -> String {
129 format!("{:?}", self.input.cur())
130 }
131
132 pub fn take_errors(&mut self) -> Vec<Error> {
133 mem::take(&mut self.errors)
134 }
135
136 pub fn parse_document(&mut self) -> PResult<Document> {
137 let start = self.input.cur_span()?;
138
139 self.document = Some(self.create_document(None));
140
141 self.run()?;
142
143 let document = &mut self.document.take().unwrap();
144 let nodes = document.children.take();
145 let mut children = Vec::with_capacity(nodes.len());
146
147 for node in nodes {
148 children.push(self.node_to_child(node));
149 }
150
151 let last = self.input.last_pos()?;
152 let mode = match &document.data {
153 Data::Document { mode, .. } => *mode.borrow(),
154 _ => {
155 unreachable!();
156 }
157 };
158
159 Ok(Document {
160 span: Span::new(start.lo(), last),
161 mode,
162 children,
163 })
164 }
165
166 // The following steps form the HTML fragment parsing algorithm. The algorithm
167 // takes as input an Element node, referred to as the context element, which
168 // gives the context for the parser, as well as input, a string to parse, and
169 // returns a list of zero or more nodes.
170 //
171 // Parts marked fragment case in algorithms in the parser section are parts that
172 // only occur if the parser was created for the purposes of this algorithm. The
173 // algorithms have been annotated with such markings for informational purposes
174 // only; such markings have no normative weight. If it is possible for a
175 // condition described as a fragment case to occur even when the parser wasn't
176 // created for the purposes of handling this algorithm, then that is an error in
177 // the specification.
178 //
179 // 1. Create a new Document node, and mark it as being an HTML document.
180 //
181 // 2. If the node document of the context element is in quirks mode, then let
182 // the Document be in quirks mode. Otherwise, the node document of the
183 // context element is in limited-quirks mode, then let the Document be in
184 // limited-quirks mode. Otherwise, leave the Document in no-quirks mode.
185 //
186 // 3. Create a new HTML parser, and associate it with the just created Document
187 // node.
188 //
189 // 4. Set the state of the HTML parser's tokenization stage as follows,
190 // switching on the context element:
191 //
192 // title
193 // textarea
194 //
195 // Switch the tokenizer to the RCDATA state.
196 //
197 // style
198 // xmp
199 // iframe
200 // noembed
201 // noframes
202 //
203 // Switch the tokenizer to the RAWTEXT state.
204 //
205 // script
206 //
207 // Switch the tokenizer to the script data state.
208 //
209 // noscript
210 //
211 // If the scripting flag is enabled, switch the tokenizer to the RAWTEXT state.
212 // Otherwise, leave the tokenizer in the data state. plaintext
213 //
214 // Switch the tokenizer to the PLAINTEXT state.
215 //
216 // Any other element
217 //
218 // Leave the tokenizer in the data state.
219 //
220 // For performance reasons, an implementation that does not report errors and
221 // that uses the actual state machine described in this specification directly
222 // could use the PLAINTEXT state instead of the RAWTEXT and script data states
223 // where those are mentioned in the list above. Except for rules regarding parse
224 // errors, they are equivalent, since there is no appropriate end tag token in
225 // the fragment case, yet they involve far fewer state transitions.
226 //
227 // 5. Let root be a new html element with no attributes.
228 //
229 // 6. Append the element root to the Document node created above.
230 //
231 // 7. Set up the parser's stack of open elements so that it contains just the
232 // single element root.
233 //
234 // 8. If the context element is a template element, push "in template" onto the
235 // stack of template insertion modes so that it is the new current template
236 // insertion mode.
237 //
238 // 9. Create a start tag token whose name is the local name of context and whose
239 // attributes are the attributes of context.
240 //
241 // Let this start tag token be the start tag token of the context node, e.g. for
242 // the purposes of determining if it is an HTML integration point.
243 //
244 // 10. Reset the parser's insertion mode appropriately.
245 //
246 // The parser will reference the context element as part of that algorithm.
247 //
248 // 11. Set the parser's form element pointer to the nearest node to the context
249 // element that is a form element (going straight up the ancestor chain, and
250 // including the element itself, if it is a form element), if any. (If there is
251 // no such form element, the form element pointer keeps its initial value,
252 // null.)
253 //
254 // 12. Place the input into the input stream for the HTML parser just created.
255 // The encoding confidence is irrelevant.
256 //
257 // 13. Start the parser and let it run until it has consumed all the characters
258 // just inserted into the input stream.
259 //
260 // 14. Return the child nodes of root, in tree order.
261 pub fn parse_document_fragment(
262 &mut self,
263 context_element: Element,
264 mode: DocumentMode,
265 form_element: Option<Element>,
266 ) -> PResult<DocumentFragment> {
267 // 1.
268 // 2.
269 self.document = Some(self.create_document(Some(mode)));
270
271 // 3.
272 // Parser already created
273 let context_node = Node::new(
274 Data::Element {
275 namespace: context_element.namespace,
276 tag_name: context_element.tag_name,
277 attributes: RefCell::new(context_element.attributes),
278 is_self_closing: context_element.is_self_closing,
279 },
280 DUMMY_SP,
281 );
282
283 // 4.
284 match get_tag_name!(context_node) {
285 "title" | "textarea" if get_namespace!(context_node) == Namespace::HTML => {
286 self.input.set_input_state(State::Rcdata);
287 }
288 "style" | "xmp" | "iframe" | "noembed" | "noframes"
289 if get_namespace!(context_node) == Namespace::HTML =>
290 {
291 self.input.set_input_state(State::Rawtext);
292 }
293 "script" if get_namespace!(context_node) == Namespace::HTML => {
294 self.input.set_input_state(State::ScriptData);
295 }
296 "noscript" if get_namespace!(context_node) == Namespace::HTML => {
297 if self.config.scripting_enabled {
298 self.input.set_input_state(State::Rawtext);
299 } else {
300 self.input.set_input_state(State::Data)
301 }
302 }
303 "plaintext" if get_namespace!(context_node) == Namespace::HTML => {
304 self.input.set_input_state(State::PlainText)
305 }
306 _ => self.input.set_input_state(State::Data),
307 }
308
309 // 5.
310 let root = self.create_fake_html_element();
311
312 // 6.
313 self.append_node(self.document.as_ref().unwrap(), root.clone());
314
315 // 7.
316 self.open_elements_stack.push(root.clone());
317
318 // 8.
319 if is_html_element!(context_node, "template") {
320 self.template_insertion_mode_stack
321 .push(InsertionMode::InTemplate);
322 }
323
324 // 9.
325 self.context_element = Some(context_node.clone());
326 self.is_fragment_case = true;
327
328 // 10.
329 self.reset_insertion_mode();
330
331 // 11.
332 if is_html_element!(context_node, "form") {
333 self.form_element_pointer = Some(context_node);
334 } else if let Some(form_element) = form_element {
335 self.form_element_pointer = Some(Node::new(
336 Data::Element {
337 namespace: form_element.namespace,
338 tag_name: form_element.tag_name,
339 attributes: RefCell::new(form_element.attributes),
340 is_self_closing: form_element.is_self_closing,
341 },
342 DUMMY_SP,
343 ));
344 }
345
346 // 12.
347 // We do preprocess input stream inside lexer
348
349 // 13.
350 let start = self.input.cur_span()?;
351
352 self.run()?;
353
354 let nodes = root.children.take();
355 let mut children = Vec::with_capacity(nodes.len());
356
357 for node in nodes {
358 children.push(self.node_to_child(node));
359 }
360
361 let last = self.input.last_pos()?;
362
363 Ok(DocumentFragment {
364 span: Span::new(start.lo(), last),
365 children,
366 })
367 }
368
369 fn create_document(&self, mode: Option<DocumentMode>) -> RcNode {
370 Node::new(
371 Data::Document {
372 mode: RefCell::new(mode.unwrap_or(DocumentMode::NoQuirks)),
373 },
374 DUMMY_SP,
375 )
376 }
377
378 #[allow(clippy::only_used_in_recursion)]
379 fn get_deep_end_span(&mut self, children: &[Child]) -> Option<Span> {
380 match children.last() {
381 Some(Child::DocumentType(DocumentType { span, .. })) => Some(*span),
382 Some(Child::Element(Element { span, children, .. })) => {
383 if span.is_dummy() {
384 return self.get_deep_end_span(children);
385 }
386
387 Some(*span)
388 }
389 Some(Child::Comment(Comment { span, .. })) => Some(*span),
390 Some(Child::Text(Text { span, .. })) => Some(*span),
391 _ => None,
392 }
393 }
394
395 fn node_to_child(&mut self, node: RcNode) -> Child {
396 let start_span = node.start_span.take();
397
398 match node.data.clone() {
399 Data::DocumentType {
400 name,
401 public_id,
402 system_id,
403 raw,
404 } => Child::DocumentType(DocumentType {
405 span: start_span,
406 name,
407 public_id,
408 system_id,
409 raw,
410 }),
411 Data::Element {
412 namespace,
413 tag_name,
414 attributes,
415 is_self_closing,
416 } => {
417 let nodes = node.children.take();
418 let mut new_children = Vec::with_capacity(nodes.len());
419
420 for node in nodes {
421 new_children.push(self.node_to_child(node));
422 }
423
424 let attributes = attributes.take();
425
426 match &*tag_name {
427 "html" | "body" if namespace == Namespace::HTML => {
428 // Elements and text after `</html>` are moving into `<body>`
429 // Elements and text after `</body>` are moving into `<body>`
430 let span = if start_span.is_dummy() {
431 start_span
432 } else {
433 let end_body = match node.end_span.take() {
434 Some(end_tag_span) => end_tag_span,
435 _ => start_span,
436 };
437 let end_children = match self.get_deep_end_span(&new_children) {
438 Some(end_span) => end_span,
439 _ => start_span,
440 };
441
442 let end = if end_body.hi() >= end_children.hi() {
443 end_body
444 } else {
445 end_children
446 };
447
448 Span::new(start_span.lo(), end.hi())
449 };
450
451 Child::Element(Element {
452 span,
453 namespace,
454 tag_name,
455 attributes,
456 is_self_closing,
457 children: new_children,
458 content: None,
459 })
460 }
461 _ => {
462 let span = if start_span.is_dummy() {
463 start_span
464 } else {
465 let end_span = match node.end_span.take() {
466 Some(end_span) if !end_span.is_dummy() => end_span,
467 _ => match self.get_deep_end_span(&new_children) {
468 Some(end_span) => end_span,
469 _ => start_span,
470 },
471 };
472
473 Span::new(start_span.lo(), end_span.hi())
474 };
475 let (children, content) =
476 if namespace == Namespace::HTML && &tag_name == "template" {
477 (
478 Vec::new(),
479 Some(DocumentFragment {
480 span,
481 children: new_children,
482 }),
483 )
484 } else {
485 (new_children, None)
486 };
487
488 Child::Element(Element {
489 span,
490 namespace,
491 tag_name,
492 attributes,
493 is_self_closing,
494 children,
495 content,
496 })
497 }
498 }
499 }
500 Data::Text { data, raw } => {
501 let span = if let Some(end_span) = node.end_span.take() {
502 Span::new(start_span.lo(), end_span.hi())
503 } else {
504 start_span
505 };
506
507 Child::Text(Text {
508 span,
509 data: data.take().into(),
510 raw: Some(raw.take().into()),
511 })
512 }
513 Data::Comment { data, raw } => Child::Comment(Comment {
514 span: start_span,
515 data,
516 raw,
517 }),
518 _ => {
519 unreachable!();
520 }
521 }
522 }
523
524 fn run(&mut self) -> PResult<()> {
525 while !self.stopped {
526 let adjusted_current_node = self.get_adjusted_current_node();
527 let is_element_in_html_namespace = is_element_in_html_namespace(adjusted_current_node);
528
529 self.input
530 .set_adjusted_current_node_to_html_namespace(is_element_in_html_namespace);
531
532 let mut token_and_info = match self.input.cur()? {
533 Some(_) => {
534 let span = self.input.cur_span()?;
535 let token = bump!(self);
536
537 TokenAndInfo {
538 span: span!(self, span.lo()),
539 acknowledged: false,
540 token,
541 }
542 }
543 None => {
544 let start_pos = self.input.start_pos()?;
545 let last_pos = self.input.last_pos()?;
546
547 TokenAndInfo {
548 span: Span::new(start_pos, last_pos),
549 acknowledged: false,
550 token: Token::Eof,
551 }
552 }
553 };
554
555 // Re-emit errors from tokenizer
556 for error in self.input.take_errors() {
557 let (span, kind) = *error.into_inner();
558
559 self.errors.push(Error::new(span, kind));
560 }
561
562 self.tree_construction_dispatcher(&mut token_and_info)?;
563
564 // When a start tag token is emitted with its self-closing flag set,
565 // if the flag is not acknowledged when it is processed by the tree
566 // construction stage, that is a parse error.
567 if let Token::StartTag {
568 is_self_closing, ..
569 } = &token_and_info.token
570 {
571 if *is_self_closing && !token_and_info.acknowledged {
572 self.errors.push(Error::new(
573 token_and_info.span,
574 ErrorKind::NonVoidHtmlElementStartTagWithTrailingSolidus,
575 ));
576 }
577 }
578 }
579
580 Ok(())
581 }
582
583 fn tree_construction_dispatcher(&mut self, token_and_info: &mut TokenAndInfo) -> PResult<()> {
584 // As each token is emitted from the tokenizer, the user agent must follow the
585 // appropriate steps from the following list, known as the tree construction
586 // dispatcher:
587 //
588 // If the stack of open elements is empty
589 //
590 // If the adjusted current node is an element in the HTML namespace
591 //
592 // If the adjusted current node is a MathML text integration point and the token
593 // is a start tag whose tag name is neither "mglyph" nor "malignmark"
594 //
595 // If the adjusted current node is a MathML text integration point and the token
596 // is a character token
597 //
598 // If the adjusted current node is a MathML annotation-xml element and the token
599 // is a start tag whose tag name is "svg"
600 //
601 // If the adjusted current node is an HTML integration point and the token is a
602 // start tag
603 //
604 // If the adjusted current node is an HTML integration point and the token is a
605 // character token
606 //
607 // If the token is an end-of-file token
608 //
609 // Process the token according to the rules given in the section corresponding
610 // to the current insertion mode in HTML content.
611 let adjusted_current_node = self.get_adjusted_current_node();
612
613 let is_element_in_html_namespace = is_element_in_html_namespace(adjusted_current_node);
614 let is_mathml_text_integration_point =
615 is_mathml_text_integration_point(adjusted_current_node);
616 let is_mathml_annotation_xml = is_mathml_annotation_xml(adjusted_current_node);
617 let is_html_integration_point = is_html_integration_point(adjusted_current_node);
618
619 if self.open_elements_stack.items.is_empty()
620 || is_element_in_html_namespace
621 || (is_mathml_text_integration_point
622 && matches!(&token_and_info.token, Token::StartTag { tag_name, .. } if *tag_name != "mglyph" && *tag_name != "malignmark"))
623 || (is_mathml_text_integration_point
624 && matches!(&token_and_info.token, Token::Character { .. }))
625 || (is_mathml_annotation_xml
626 && matches!(&token_and_info.token, Token::StartTag { tag_name, .. } if tag_name == "svg"))
627 || (is_html_integration_point
628 && matches!(&token_and_info.token, Token::StartTag { .. }))
629 || (is_html_integration_point
630 && matches!(&token_and_info.token, Token::Character { .. }))
631 || matches!(&token_and_info.token, Token::Eof)
632 {
633 self.process_token(token_and_info, None)?;
634 }
635 // Otherwise
636 // Process the token according to the rules given in the section for parsing tokens in
637 // foreign content.
638 else {
639 self.process_token_in_foreign_content(token_and_info)?;
640 }
641
642 Ok(())
643 }
644
645 // The adjusted current node is the context element if the parser was created as
646 // part of the HTML fragment parsing algorithm and the stack of open elements
647 // has only one element in it (fragment case); otherwise, the adjusted current
648 // node is the current node.
649 fn get_adjusted_current_node(&self) -> Option<&RcNode> {
650 if self.is_fragment_case && self.open_elements_stack.items.len() == 1 {
651 return self.context_element.as_ref();
652 }
653
654 self.open_elements_stack.items.last()
655 }
656
657 fn process_token_in_foreign_content(
658 &mut self,
659 token_and_info: &mut TokenAndInfo,
660 ) -> PResult<()> {
661 let TokenAndInfo { token, .. } = &token_and_info;
662
663 match token {
664 // A character token that is U+0000 NULL
665 //
666 // Parse error. Insert a U+FFFD REPLACEMENT CHARACTER character.
667 Token::Character { value, .. } if *value == '\x00' => {
668 self.errors.push(Error::new(
669 token_and_info.span,
670 ErrorKind::UnexpectedNullCharacter,
671 ));
672
673 token_and_info.token = Token::Character {
674 value: '\u{FFFD}',
675 raw: Some(Raw::Atom(Atom::new(String::from('\x00')))),
676 };
677
678 println!("{:?}", token_and_info.token);
679
680 self.insert_character(token_and_info)?;
681 }
682 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF),
683 // U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE
684 //
685 // Insert the token's character.
686 Token::Character {
687 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
688 ..
689 } => {
690 self.insert_character(token_and_info)?;
691 }
692 // Any other character token
693 //
694 // Insert the token's character.
695 //
696 // Set the frameset-ok flag to "not ok".
697 Token::Character { .. } => {
698 self.insert_character(token_and_info)?;
699
700 self.frameset_ok = false;
701 }
702 // A comment token
703 //
704 // Insert a comment.
705 Token::Comment { .. } => {
706 self.insert_comment(token_and_info)?;
707 }
708 // A DOCTYPE token
709 // Parse error. Ignore the token.
710 Token::Doctype { .. } => {
711 self.errors
712 .push(Error::new(token_and_info.span, ErrorKind::StrayDoctype));
713 }
714 // A start tag whose tag name is one of: "b", "big", "blockquote", "body", "br",
715 // "center", "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2", "h3", "h4",
716 // "h5", "h6", "head", "hr", "i", "img", "li", "listing", "menu", "meta", "nobr", "ol",
717 // "p", "pre", "ruby", "s", "small", "span", "strong", "strike", "sub", "sup", "table",
718 // "tt", "u", "ul", "var"
719 //
720 // A start tag whose tag name is "font", if the token has any attributes named "color",
721 // "face", or "size"
722 //
723 // An end tag whose tag name is "br", "p"
724 //
725 // Parse error.
726 //
727 // While the current node is not a MathML text integration point, an HTML integration
728 // point, or an element in the HTML namespace, pop elements from the stack of open
729 // elements.
730 //
731 // Reprocess the token according to the rules given in the section corresponding to the
732 // current insertion mode in HTML content.
733 Token::StartTag { tag_name, .. }
734 if matches!(
735 &**tag_name,
736 "b" | "big"
737 | "blockquote"
738 | "body"
739 | "br"
740 | "center"
741 | "code"
742 | "dd"
743 | "div"
744 | "dl"
745 | "dt"
746 | "em"
747 | "embed"
748 | "h1"
749 | "h2"
750 | "h3"
751 | "h4"
752 | "h5"
753 | "h6"
754 | "head"
755 | "hr"
756 | "i"
757 | "img"
758 | "li"
759 | "listing"
760 | "menu"
761 | "meta"
762 | "nobr"
763 | "ol"
764 | "p"
765 | "pre"
766 | "ruby"
767 | "s"
768 | "small"
769 | "span"
770 | "strong"
771 | "strike"
772 | "sub"
773 | "sup"
774 | "table"
775 | "tt"
776 | "u"
777 | "ul"
778 | "var"
779 ) =>
780 {
781 self.errors.push(Error::new(
782 token_and_info.span,
783 ErrorKind::HtmlStartTagInForeignContext(tag_name.clone()),
784 ));
785 self.open_elements_stack.pop_until_in_foreign();
786 self.process_token(token_and_info, None)?;
787 }
788 Token::StartTag {
789 tag_name,
790 attributes,
791 ..
792 } if tag_name == "font"
793 && attributes
794 .iter()
795 .any(|attribute| matches!(&*attribute.name, "color" | "face" | "size")) =>
796 {
797 self.errors.push(Error::new(
798 token_and_info.span,
799 ErrorKind::HtmlStartTagInForeignContext(tag_name.clone()),
800 ));
801 self.open_elements_stack.pop_until_in_foreign();
802 self.process_token(token_and_info, None)?;
803 }
804 Token::EndTag { tag_name, .. } if matches!(&**tag_name, "br" | "p") => {
805 let last = get_tag_name!(self.open_elements_stack.items.last().unwrap());
806
807 self.errors.push(Error::new(
808 token_and_info.span,
809 ErrorKind::EndTagDidNotMatchCurrentOpenElement(tag_name.clone(), last.into()),
810 ));
811 self.open_elements_stack.pop_until_in_foreign();
812 self.process_token(token_and_info, None)?;
813 }
814 // Any other start tag
815 //
816 // If the adjusted current node is an element in the MathML namespace, adjust MathML
817 // attributes for the token. (This fixes the case of MathML attributes that are not all
818 // lowercase.)
819 //
820 // If the adjusted current node is an element in the SVG namespace, and the token's tag
821 // name is one of the ones in the first column of the following table, change the tag
822 // name to the name given in the corresponding cell in the second column. (This fixes
823 // the case of SVG elements that are not all lowercase.)
824 //
825 // Tag name Element name
826 // altglyph altGlyph
827 // altglyphdef altGlyphDef
828 // altglyphitem altGlyphItem
829 // animatecolor animateColor
830 // animatemotion animateMotion
831 // animatetransform animateTransform
832 // clippath clipPath
833 // feblend feBlend
834 // fecolormatrix feColorMatrix
835 // fecomponenttransfer feComponentTransfer
836 // fecomposite feComposite
837 // feconvolvematrix feConvolveMatrix
838 // fediffuselighting feDiffuseLighting
839 // fedisplacementmap feDisplacementMap
840 // fedistantlight feDistantLight
841 // fedropshadow feDropShadow
842 // feflood feFlood
843 // fefunca feFuncA
844 // fefuncb feFuncB
845 // fefuncg feFuncG
846 // fefuncr feFuncR
847 // fegaussianblur feGaussianBlur
848 // feimage feImage
849 // femerge feMerge
850 // femergenode feMergeNode
851 // femorphology feMorphology
852 // feoffset feOffset
853 // fepointlight fePointLight
854 // fespecularlighting feSpecularLighting
855 // fespotlight feSpotLight
856 // fetile feTile
857 // feturbulence feTurbulence
858 // foreignobject foreignObject
859 // glyphref glyphRef
860 // lineargradient linearGradient
861 // radialgradient radialGradient
862 // textpath textPath
863 //
864 // If the adjusted current node is an element in the SVG namespace, adjust SVG
865 // attributes for the token. (This fixes the case of SVG attributes that are not all
866 // lowercase.)
867 //
868 // Adjust foreign attributes for the token. (This fixes the use of namespaced
869 // attributes, in particular XLink in SVG.)
870 //
871 // Insert a foreign element for the token, in the same namespace as the adjusted current
872 // node.
873 //
874 // If the token has its self-closing flag set, then run the appropriate steps from the
875 // following list:
876 //
877 // If the token's tag name is "script", and the new current node is in the SVG
878 // namespace
879 //
880 // Acknowledge the token's self-closing flag, and then act as
881 // described in the steps for a "script" end tag below.
882 //
883 // Otherwise
884 // Pop the current node off the stack of open elements and acknowledge the token's
885 // self-closing flag.
886 Token::StartTag {
887 tag_name,
888 raw_tag_name,
889 is_self_closing,
890 attributes,
891 } => {
892 let is_self_closing = *is_self_closing;
893 let is_script = tag_name == "script";
894 let adjusted_current_node = self.get_adjusted_current_node();
895 let namespace = match adjusted_current_node {
896 Some(node) => {
897 get_namespace!(node)
898 }
899 _ => {
900 unreachable!();
901 }
902 };
903 let adjust_attributes = match namespace {
904 Namespace::MATHML => Some(AdjustAttributes::MathML),
905 Namespace::SVG => Some(AdjustAttributes::Svg),
906 _ => None,
907 };
908
909 if namespace == Namespace::SVG {
910 let new_tag_name = match &**tag_name {
911 "altglyph" => Some("altGlyph"),
912 "altglyphdef" => Some("altGlyphDef"),
913 "altglyphitem" => Some("altGlyphItem"),
914 "animatecolor" => Some("animateColor"),
915 "animatemotion" => Some("animateMotion"),
916 "animatetransform" => Some("animateTransform"),
917 "clippath" => Some("clipPath"),
918 "feblend" => Some("feBlend"),
919 "fecolormatrix" => Some("feColorMatrix"),
920 "fecomponenttransfer" => Some("feComponentTransfer"),
921 "fecomposite" => Some("feComposite"),
922 "feconvolvematrix" => Some("feConvolveMatrix"),
923 "fediffuselighting" => Some("feDiffuseLighting"),
924 "fedisplacementmap" => Some("feDisplacementMap"),
925 "fedistantlight" => Some("feDistantLight"),
926 "fedropshadow" => Some("feDropShadow"),
927 "feflood" => Some("feFlood"),
928 "fefunca" => Some("feFuncA"),
929 "fefuncb" => Some("feFuncB"),
930 "fefuncg" => Some("feFuncG"),
931 "fefuncr" => Some("feFuncR"),
932 "fegaussianblur" => Some("feGaussianBlur"),
933 "feimage" => Some("feImage"),
934 "femerge" => Some("feMerge"),
935 "femergenode" => Some("feMergeNode"),
936 "femorphology" => Some("feMorphology"),
937 "feoffset" => Some("feOffset"),
938 "fepointlight" => Some("fePointLight"),
939 "fespecularlighting" => Some("feSpecularLighting"),
940 "fespotlight" => Some("feSpotLight"),
941 "fetile" => Some("feTile"),
942 "feturbulence" => Some("feTurbulence"),
943 "foreignobject" => Some("foreignObject"),
944 "glyphref" => Some("glyphRef"),
945 "lineargradient" => Some("linearGradient"),
946 "radialgradient" => Some("radialGradient"),
947 "textpath" => Some("textPath"),
948 _ => None,
949 };
950
951 if let Some(new_tag_name) = new_tag_name {
952 token_and_info.token = Token::StartTag {
953 tag_name: new_tag_name.into(),
954 raw_tag_name: raw_tag_name.clone(),
955 is_self_closing,
956 attributes: attributes.clone(),
957 }
958 }
959 }
960
961 self.insert_foreign_element(token_and_info, namespace, adjust_attributes)?;
962
963 if is_self_closing {
964 if is_script
965 && match self.open_elements_stack.items.last() {
966 Some(node) => get_namespace!(node) == Namespace::SVG,
967 _ => false,
968 }
969 {
970 token_and_info.acknowledged = true;
971
972 self.open_elements_stack.pop();
973 } else {
974 self.open_elements_stack.pop();
975
976 token_and_info.acknowledged = true;
977 }
978 }
979 }
980 // An end tag whose tag name is "script", if the current node is an SVG script element
981 //
982 // Pop the current node off the stack of open elements.
983 //
984 // Let the old insertion point have the same value as the current insertion point. Let
985 // the insertion point be just before the next input character.
986 //
987 // Increment the parser's script nesting level by one. Set the parser pause flag to
988 // true.
989 //
990 // If the active speculative HTML parser is null and the user agent supports SVG, then
991 // Process the SVG script element according to the SVG rules. [SVG]
992 //
993 // Even if this causes new characters to be inserted into the tokenizer, the parser will
994 // not be executed reentrantly, since the parser pause flag is true.
995 //
996 // Decrement the parser's script nesting level by one. If the parser's script nesting
997 // level is zero, then set the parser pause flag to false.
998 //
999 // Let the insertion point have the value of the old insertion point. (In other words,
1000 // restore the insertion point to its previous value. This value might be the
1001 // "undefined" value.)
1002 Token::EndTag { tag_name, .. } if tag_name == "script" => {
1003 let popped = self.open_elements_stack.pop();
1004
1005 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
1006
1007 // No need to handle other steps
1008 }
1009 // Any other end tag
1010 //
1011 // Run these steps:
1012 //
1013 // Initialize node to be the current node (the bottommost node of the stack).
1014 //
1015 // If node's tag name, converted to ASCII lowercase, is not the same as the tag name of
1016 // the token, then this is a parse error.
1017 //
1018 // Loop: If node is the topmost element in the stack of open elements, then return.
1019 // (fragment case)
1020 //
1021 // If node's tag name, converted to ASCII lowercase, is the same as the tag name of the
1022 // token, pop elements from the stack of open elements until node has been popped from
1023 // the stack, and then return.
1024 //
1025 // Set node to the previous entry in the stack of open elements.
1026 //
1027 // If node is not an element in the HTML namespace, return to the step labeled loop.
1028 //
1029 // Otherwise, process the token according to the rules given in the section
1030 // corresponding to the current insertion mode in HTML content.
1031 Token::EndTag { tag_name, .. } => {
1032 let mut node = self.open_elements_stack.items.last();
1033 let mut stack_idx = self.open_elements_stack.items.len() - 1;
1034
1035 if let Some(node) = &node {
1036 let node_tag_name = get_tag_name!(node);
1037
1038 if node_tag_name.to_ascii_lowercase() != **tag_name {
1039 if stack_idx == 0 {
1040 self.errors.push(Error::new(
1041 token_and_info.span,
1042 ErrorKind::StrayEndTag(tag_name.clone()),
1043 ));
1044 } else {
1045 self.errors.push(Error::new(
1046 token_and_info.span,
1047 ErrorKind::EndTagDidNotMatchCurrentOpenElement(
1048 tag_name.clone(),
1049 node_tag_name.into(),
1050 ),
1051 ));
1052 }
1053 }
1054 }
1055
1056 loop {
1057 if stack_idx == 0 || node.is_none() {
1058 return Ok(());
1059 }
1060
1061 let inner_node = node.unwrap();
1062
1063 match &inner_node.data {
1064 Data::Element {
1065 tag_name: node_tag_name,
1066 ..
1067 } if node_tag_name.to_ascii_lowercase() == **tag_name => {
1068 let clone = inner_node.clone();
1069 let popped = self.open_elements_stack.pop_until_node(&clone);
1070
1071 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
1072
1073 return Ok(());
1074 }
1075 _ => {}
1076 }
1077
1078 stack_idx -= 1;
1079 node = self.open_elements_stack.items.get(stack_idx);
1080
1081 if let Some(node) = node {
1082 if get_namespace!(node) == Namespace::HTML {
1083 break;
1084 }
1085 }
1086 }
1087
1088 self.process_token(token_and_info, None)?;
1089 }
1090 // EOF token is not reachable here
1091 _ => {
1092 unreachable!();
1093 }
1094 }
1095
1096 Ok(())
1097 }
1098
1099 fn process_token(
1100 &mut self,
1101 token_and_info: &mut TokenAndInfo,
1102 override_insertion_mode: Option<InsertionMode>,
1103 ) -> PResult<()> {
1104 let TokenAndInfo { token, .. } = &token_and_info;
1105 let insertion_mode = match override_insertion_mode {
1106 Some(insertion_mode) => insertion_mode,
1107 _ => self.insertion_mode.clone(),
1108 };
1109
1110 /// Convenience: allow non-void HTML elements to self-close when
1111 /// a relevant config flag is set. It is achieved by processing the
1112 /// matching end tag right after the starting self-closing tag.
1113 macro_rules! maybe_allow_self_closing {
1114 ($is_self_closing: ident, $tag_name: ident) => {
1115 if self.config.allow_self_closing && *$is_self_closing {
1116 let mut end_token_and_info = TokenAndInfo {
1117 span: token_and_info.span,
1118 acknowledged: false,
1119 token: Token::EndTag {
1120 tag_name: $tag_name.to_owned(),
1121 raw_tag_name: None,
1122 is_self_closing: false,
1123 attributes: Vec::new(),
1124 },
1125 };
1126 self.process_token(&mut end_token_and_info, None)?;
1127 }
1128 };
1129 }
1130
1131 match insertion_mode {
1132 // The "initial" insertion mode
1133 InsertionMode::Initial => {
1134 // A Document object has an associated parser cannot change the mode flag (a
1135 // boolean). It is initially false.
1136
1137 // When the user agent is to apply the rules for the "initial" insertion mode,
1138 // the user agent must handle the token as follows:
1139 match token {
1140 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
1141 // FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020
1142 // SPACE
1143 //
1144 // Ignore the token.
1145 Token::Character {
1146 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
1147 ..
1148 } => {
1149 return Ok(());
1150 }
1151 // A comment token
1152 //
1153 // Insert a comment as the last child of the Document object.
1154 Token::Comment { .. } => {
1155 self.insert_comment_as_last_child_of_document(token_and_info)?;
1156 }
1157 // A DOCTYPE token
1158 //
1159 // If the DOCTYPE token's name is not "html", or the token's public identifier
1160 // is not missing, or the token's system identifier is neither missing nor
1161 // "about:legacy-compat", then there is a parse error.
1162 //
1163 // Append a DocumentType node to the Document node, with its name set to the
1164 // name given in the DOCTYPE token, or the empty string if the name was missing;
1165 // its public ID set to the public identifier given in the DOCTYPE token, or the
1166 // empty string if the public identifier was missing; and its system ID set to
1167 // the system identifier given in the DOCTYPE token, or the empty string if the
1168 // system identifier was missing.
1169 //
1170 // Then, if the document is not an iframe srcdoc document, and the parser cannot
1171 // change the mode flag is false, and the DOCTYPE token matches one of the
1172 // conditions in the following list, then set the Document to quirks mode:
1173 //
1174 // The force-quirks flag is set to on.
1175 //
1176 // The name is not "html".
1177 //
1178 // The public identifier is set to: "-//W3O//DTD W3 HTML Strict 3.0//EN//"
1179 //
1180 // The public identifier is set to: "-/W3C/DTD HTML 4.0 Transitional/EN"
1181 //
1182 // The public identifier is set to: "HTML"
1183 //
1184 // The system identifier is set to: "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"
1185 //
1186 // The public identifier starts with: "+//Silmaril//dtd html Pro v0r11
1187 // 19970101//"
1188 //
1189 // The public identifier starts with: "-//AS//DTD HTML 3.0 asWedit +
1190 // extensions//"
1191 //
1192 // The public identifier starts with: "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit +
1193 // extensions//"
1194 //
1195 // The public identifier starts with: "-//IETF//DTD HTML 2.0 Level 1//"
1196 //
1197 // The public identifier starts with: "-//IETF//DTD HTML 2.0 Level 2//"
1198 //
1199 // The public identifier starts with: "-//IETF//DTD HTML 2.0 Strict Level 1//"
1200 //
1201 // The public identifier starts with: "-//IETF//DTD HTML 2.0 Strict Level 2//"
1202 //
1203 // The public identifier starts with: "-//IETF//DTD HTML 2.0 Strict//"
1204 //
1205 // The public identifier starts with: "-//IETF//DTD HTML 2.0//"
1206 //
1207 // The public identifier starts with: "-//IETF//DTD HTML 2.1E//"
1208 //
1209 // The public identifier starts with: "-//IETF//DTD HTML 3.0//"
1210 //
1211 // The public identifier starts with: "-//IETF//DTD HTML 3.2 Final//"
1212 //
1213 // The public identifier starts with: "-//IETF//DTD HTML 3.2//"
1214 //
1215 // The public identifier starts with: "-//IETF//DTD HTML 3//"
1216 //
1217 // The public identifier starts with: "-//IETF//DTD HTML Level 0//"
1218 //
1219 // The public identifier starts with: "-//IETF//DTD HTML Level 1//"
1220 //
1221 // The public identifier starts with: "-//IETF//DTD HTML Level 2//"
1222 //
1223 // The public identifier starts with: "-//IETF//DTD HTML Level 3//"
1224 //
1225 // The public identifier starts with: "-//IETF//DTD HTML Strict Level 0//"
1226 //
1227 // The public identifier starts with: "-//IETF//DTD HTML Strict Level 1//"
1228 //
1229 // The public identifier starts with: "-//IETF//DTD HTML Strict Level 2//"
1230 //
1231 // The public identifier starts with: "-//IETF//DTD HTML Strict Level 3//"
1232 //
1233 // The public identifier starts with: "-//IETF//DTD HTML Strict//"
1234 //
1235 // The public identifier starts with: "-//IETF//DTD HTML//"
1236 //
1237 // The public identifier starts with: "-//Metrius//DTD Metrius Presentational//"
1238 //
1239 // The public identifier starts with: "-//Microsoft//DTD Internet Explorer 2.0
1240 // HTML Strict//"
1241 //
1242 // The public identifier starts with: "-//Microsoft//DTD Internet Explorer 2.0
1243 // HTML//"
1244 //
1245 // The public identifier starts with: "-//Microsoft//DTD Internet Explorer 2.0
1246 // Tables//"
1247 //
1248 // The public identifier starts with: "-//Microsoft//DTD Internet Explorer 3.0
1249 // HTML Strict//"
1250 //
1251 // The public identifier starts with: "-//Microsoft//DTD Internet Explorer 3.0
1252 // HTML//"
1253 //
1254 // The public identifier starts with: "-//Microsoft//DTD Internet Explorer 3.0
1255 // Tables//"
1256 //
1257 // The public identifier starts with: "-//Netscape Comm. Corp.//DTD HTML//"
1258 //
1259 // The public identifier starts with: "-//Netscape Comm. Corp.//DTD Strict
1260 // HTML//"
1261 //
1262 // The public identifier starts with: "-//O'Reilly and Associates//DTD HTML
1263 // 2.0//"
1264 //
1265 // The public identifier starts with: "-//O'Reilly and Associates//DTD HTML
1266 // Extended 1.0//"
1267 //
1268 // The public identifier starts with: "-//O'Reilly and Associates//DTD HTML
1269 // Extended Relaxed 1.0//"
1270 //
1271 // The public identifier starts with: "-//SQ//DTD HTML 2.0 HoTMetaL +
1272 // extensions//"
1273 //
1274 // The public identifier starts with: "-//SoftQuad Software//DTD HoTMetaL PRO
1275 // 6.0::19990601::extensions to HTML 4.0//"
1276 //
1277 // The public identifier starts with: "-//SoftQuad//DTD HoTMetaL PRO
1278 // 4.0::19971010::extensions to HTML 4.0//"
1279 //
1280 // The public identifier starts with: "-//Spyglass//DTD HTML 2.0 Extended//"
1281 //
1282 // The public identifier starts with: "-//Sun Microsystems Corp.//DTD HotJava
1283 // HTML//"
1284 //
1285 // The public identifier starts with: "-//Sun Microsystems Corp.//DTD HotJava
1286 // Strict HTML//"
1287 //
1288 // The public identifier starts with: "-//W3C//DTD HTML 3 1995-03-24//"
1289 //
1290 // The public identifier starts with: "-//W3C//DTD HTML 3.2 Draft//"
1291 //
1292 // The public identifier starts with: "-//W3C//DTD HTML 3.2 Final//"
1293 //
1294 // The public identifier starts with: "-//W3C//DTD HTML 3.2//"
1295 //
1296 // The public identifier starts with: "-//W3C//DTD HTML 3.2S Draft//"
1297 //
1298 // The public identifier starts with: "-//W3C//DTD HTML 4.0 Frameset//"
1299 //
1300 // The public identifier starts with: "-//W3C//DTD HTML 4.0 Transitional//"
1301 //
1302 // The public identifier starts with: "-//W3C//DTD HTML Experimental 19960712//"
1303 //
1304 // The public identifier starts with: "-//W3C//DTD HTML Experimental 970421//"
1305 //
1306 // The public identifier starts with: "-//W3C//DTD W3 HTML//"
1307 //
1308 // The public identifier starts with: "-//W3O//DTD W3 HTML 3.0//"
1309 //
1310 // The public identifier starts with: "-//WebTechs//DTD Mozilla HTML 2.0//"
1311 //
1312 // The public identifier starts with: "-//WebTechs//DTD Mozilla HTML//"
1313 //
1314 // The system identifier is missing and the public identifier starts with:
1315 // "-//W3C//DTD HTML 4.01 Frameset//"
1316 //
1317 // The system identifier is missing and the public identifier starts with:
1318 // "-//W3C//DTD HTML 4.01 Transitional//"
1319 //
1320 // Otherwise, if the document is not an iframe srcdoc document, and the parser
1321 // cannot change the mode flag is false, and the DOCTYPE token matches one of
1322 // the conditions in the following list, then then set the Document to
1323 // limited-quirks mode:
1324 //
1325 // The public identifier starts with: "-//W3C//DTD XHTML 1.0 Frameset//"
1326 //
1327 // The public identifier starts with: "-//W3C//DTD XHTML 1.0 Transitional//"
1328 //
1329 // The system identifier is not missing and the public identifier starts with:
1330 // "-//W3C//DTD HTML 4.01 Frameset//"
1331 //
1332 // The system identifier is not missing and the public identifier starts with:
1333 // "-//W3C//DTD HTML 4.01 Transitional//"
1334 //
1335 // The system identifier and public identifier strings must be compared to the
1336 // values given in the lists above in an ASCII case-insensitive manner. A system
1337 // identifier whose value is the empty string is not considered missing for the
1338 // purposes of the conditions above.
1339 //
1340 // Then, switch the insertion mode to "before html".
1341 Token::Doctype {
1342 name,
1343 public_id,
1344 system_id,
1345 force_quirks,
1346 raw,
1347 ..
1348 } => {
1349 let is_html_name =
1350 matches!(name, Some(name) if name.eq_ignore_ascii_case("html"));
1351 let is_conforming_doctype = is_html_name
1352 && public_id.is_none()
1353 && (system_id.is_none()
1354 || matches!(system_id, Some(system_id) if system_id == "about:legacy-compat"));
1355
1356 if !is_conforming_doctype {
1357 self.errors.push(Error::new(
1358 token_and_info.span,
1359 ErrorKind::NonConformingDoctype,
1360 ));
1361 }
1362
1363 let document_type = Node::new(
1364 Data::DocumentType {
1365 name: name.clone(),
1366 public_id: public_id.clone(),
1367 system_id: system_id.clone(),
1368 raw: raw.clone(),
1369 },
1370 token_and_info.span,
1371 );
1372
1373 self.append_node(self.document.as_ref().unwrap(), document_type);
1374
1375 if !self.config.iframe_srcdoc
1376 && (*force_quirks
1377 || !is_html_name
1378 || matches!(public_id, Some(public_id) if QUIRKY_PUBLIC_MATCHES
1379 .contains(&&*public_id.to_ascii_lowercase()) || QUIRKY_PUBLIC_PREFIXES.contains(&&*public_id.to_ascii_lowercase()))
1380 || matches!(system_id, Some(system_id) if QUIRKY_SYSTEM_MATCHES
1381 .contains(&&*system_id.to_ascii_lowercase()) || HTML4_PUBLIC_PREFIXES.contains(
1382 &&*system_id.to_ascii_lowercase()
1383 )))
1384 {
1385 self.set_document_mode(DocumentMode::Quirks);
1386 } else if let Some(public_id) = public_id {
1387 if LIMITED_QUIRKY_PUBLIC_PREFIXES
1388 .contains(&&*public_id.as_ref().to_ascii_lowercase())
1389 {
1390 self.set_document_mode(DocumentMode::Quirks);
1391 }
1392 } else if let Some(system_id) = system_id {
1393 if HTML4_PUBLIC_PREFIXES
1394 .contains(&&*system_id.as_ref().to_ascii_lowercase())
1395 {
1396 self.set_document_mode(DocumentMode::Quirks);
1397 }
1398 }
1399
1400 self.insertion_mode = InsertionMode::BeforeHtml;
1401 }
1402 // Anything else
1403 //
1404 // If the document is not an iframe srcdoc document, then this is a parse error;
1405 // if the parser cannot change the mode flag is false, set the Document to
1406 // quirks mode.
1407 //
1408 // In any case, switch the insertion mode to "before html", then reprocess the
1409 // token.
1410 _ => {
1411 if !self.config.iframe_srcdoc {
1412 match &token {
1413 Token::StartTag { .. } => {
1414 self.errors.push(Error::new(
1415 token_and_info.span,
1416 ErrorKind::StartTagWithoutDoctype,
1417 ));
1418 }
1419 Token::EndTag { .. } => {
1420 self.errors.push(Error::new(
1421 token_and_info.span,
1422 ErrorKind::EndTagSeenWithoutDoctype,
1423 ));
1424 }
1425 Token::Character { .. } => {
1426 self.errors.push(Error::new(
1427 token_and_info.span,
1428 ErrorKind::NonSpaceCharacterWithoutDoctype,
1429 ));
1430 }
1431 Token::Eof => {
1432 self.errors.push(Error::new(
1433 token_and_info.span,
1434 ErrorKind::EofWithoutDoctype,
1435 ));
1436 }
1437 _ => {
1438 unreachable!();
1439 }
1440 }
1441
1442 self.set_document_mode(DocumentMode::Quirks);
1443 }
1444
1445 self.insertion_mode = InsertionMode::BeforeHtml;
1446 self.process_token(token_and_info, None)?;
1447 }
1448 }
1449 }
1450 // The "before html" insertion mode
1451 InsertionMode::BeforeHtml => {
1452 let anything_else =
1453 |parser: &mut Parser<I>, token_and_info: &mut TokenAndInfo| -> PResult<()> {
1454 let element = parser.create_fake_html_element();
1455
1456 parser.open_elements_stack.push(element.clone());
1457
1458 // Never be `None`
1459 if let Some(document) = &parser.document {
1460 parser.append_node(document, element);
1461 }
1462
1463 parser.insertion_mode = InsertionMode::BeforeHead;
1464 parser.process_token(token_and_info, None)?;
1465
1466 Ok(())
1467 };
1468
1469 // When the user agent is to apply the rules for the "before html" insertion
1470 // mode, the user agent must handle the token as follows:
1471 match token {
1472 // A DOCTYPE token
1473 //
1474 // Parse error. Ignore the token.
1475 Token::Doctype { .. } => {
1476 self.errors
1477 .push(Error::new(token_and_info.span, ErrorKind::StrayDoctype));
1478 }
1479 // A comment token
1480 //
1481 // Insert a comment as the last child of the Document object.
1482 Token::Comment { .. } => {
1483 self.insert_comment_as_last_child_of_document(token_and_info)?;
1484 }
1485 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
1486 // FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020
1487 // SPACE
1488 //
1489 // Ignore the token.
1490 Token::Character {
1491 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
1492 ..
1493 } => {
1494 return Ok(());
1495 }
1496 // A start tag whose tag name is "html"
1497 //
1498 // Create an element for the token in the HTML namespace, with the Document as
1499 // the intended parent. Append it to the Document object. Put this element in
1500 // the stack of open elements.
1501 //
1502 // Switch the insertion mode to "before head".
1503 Token::StartTag {
1504 tag_name,
1505 attributes,
1506 is_self_closing,
1507 ..
1508 } if tag_name == "html" => {
1509 let element = Node::new(
1510 Data::Element {
1511 namespace: Namespace::HTML,
1512 tag_name: tag_name.clone(),
1513 attributes: RefCell::new(
1514 attributes
1515 .iter()
1516 .map(|token_attribute| Attribute {
1517 span: token_attribute.span,
1518 namespace: None,
1519 prefix: None,
1520 name: token_attribute.name.clone(),
1521 raw_name: token_attribute.raw_name.clone(),
1522 value: token_attribute.value.clone(),
1523 raw_value: token_attribute.raw_value.clone(),
1524 })
1525 .collect(),
1526 ),
1527 is_self_closing: *is_self_closing,
1528 },
1529 token_and_info.span,
1530 );
1531
1532 self.open_elements_stack.push(element.clone());
1533
1534 // Never be `None`
1535 if let Some(document) = &self.document {
1536 self.append_node(document, element);
1537 }
1538
1539 self.insertion_mode = InsertionMode::BeforeHead;
1540 }
1541 // An end tag whose tag name is one of: "head", "body", "html", "br"
1542 //
1543 // Act as described in the "anything else" entry below.
1544 Token::EndTag { tag_name, .. }
1545 if matches!(&**tag_name, "head" | "body" | "html" | "br") =>
1546 {
1547 anything_else(self, token_and_info)?;
1548 }
1549 // Any other end tag
1550 //
1551 // Parse error. Ignore the token.
1552 Token::EndTag { tag_name, .. } => {
1553 self.errors.push(Error::new(
1554 token_and_info.span,
1555 ErrorKind::StrayEndTag(tag_name.clone()),
1556 ));
1557 }
1558 // Anything else
1559 //
1560 // Create an html element whose node document is the Document object. Append it
1561 // to the Document object. Put this element in the stack of open elements.
1562 //
1563 // Switch the insertion mode to "before head", then reprocess the token.
1564 _ => {
1565 anything_else(self, token_and_info)?;
1566 }
1567 }
1568
1569 // The document element can end up being removed from the
1570 // Document object, e.g. by scripts; nothing in particular
1571 // happens in such cases, content continues being appended to
1572 // the nodes as described in the next section.
1573 }
1574 // The "before head" insertion mode
1575 InsertionMode::BeforeHead => {
1576 let anything_else = |parser: &mut Parser<I>,
1577 token_and_info: &mut TokenAndInfo|
1578 -> PResult<()> {
1579 let element = parser
1580 .insert_html_element(&parser.create_fake_token_and_info("head", None))?;
1581
1582 parser.head_element_pointer = Some(element);
1583 parser.insertion_mode = InsertionMode::InHead;
1584 parser.process_token(token_and_info, None)?;
1585
1586 Ok(())
1587 };
1588
1589 // When the user agent is to apply the rules for the "before head" insertion
1590 // mode, the user agent must handle the token as follows:
1591 match token {
1592 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
1593 // FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020
1594 // SPACE
1595 //
1596 // Ignore the token.
1597 Token::Character {
1598 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
1599 ..
1600 } => {
1601 return Ok(());
1602 }
1603 // A comment token
1604 //
1605 // Insert a comment.
1606 Token::Comment { .. } => {
1607 self.insert_comment(token_and_info)?;
1608 }
1609 // A DOCTYPE token
1610 //
1611 // Parse error. Ignore the token.
1612 Token::Doctype { .. } => {
1613 self.errors
1614 .push(Error::new(token_and_info.span, ErrorKind::StrayDoctype));
1615 }
1616 // A start tag whose tag name is "html"
1617 //
1618 // Process the token using the rules for the "in body" insertion mode.
1619 Token::StartTag { tag_name, .. } if tag_name == "html" => {
1620 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
1621 }
1622 // A start tag whose tag name is "head"
1623 //
1624 // Insert an HTML element for the token.
1625 //
1626 // Set the head element pointer to the newly created head element.
1627 //
1628 // Switch the insertion mode to "in head".
1629 Token::StartTag { tag_name, .. } if tag_name == "head" => {
1630 let element = self.insert_html_element(token_and_info)?;
1631
1632 self.head_element_pointer = Some(element);
1633 self.insertion_mode = InsertionMode::InHead;
1634 }
1635 // An end tag whose tag name is one of: "head", "body", "html", "br"
1636 //
1637 // Act as described in the "anything else" entry below.
1638 Token::EndTag { tag_name, .. }
1639 if matches!(&**tag_name, "head" | "body" | "html" | "br") =>
1640 {
1641 anything_else(self, token_and_info)?;
1642 }
1643 // Any other end tag
1644 //
1645 // Parse error. Ignore the token.
1646 Token::EndTag { tag_name, .. } => {
1647 self.errors.push(Error::new(
1648 token_and_info.span,
1649 ErrorKind::StrayEndTag(tag_name.clone()),
1650 ));
1651 }
1652 // Anything else
1653 //
1654 // Insert an HTML element for a "head" start tag token with no attributes.
1655 //
1656 // Set the head element pointer to the newly created head element.
1657 //
1658 // Switch the insertion mode to "in head".
1659 //
1660 // Reprocess the current token.
1661 _ => {
1662 anything_else(self, token_and_info)?;
1663 }
1664 }
1665 }
1666 // The "in head" insertion mode
1667 InsertionMode::InHead => {
1668 let anything_else =
1669 |parser: &mut Parser<I>, token_and_info: &mut TokenAndInfo| -> PResult<()> {
1670 parser.open_elements_stack.pop();
1671 parser.insertion_mode = InsertionMode::AfterHead;
1672 parser.process_token(token_and_info, None)?;
1673
1674 Ok(())
1675 };
1676
1677 // When the user agent is to apply the rules for the "in head" insertion mode,
1678 // the user agent must handle the token as follows:
1679 match token {
1680 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
1681 // FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020
1682 // SPACE
1683 //
1684 // Insert the character.
1685 Token::Character {
1686 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
1687 ..
1688 } => {
1689 self.insert_character(token_and_info)?;
1690 }
1691 // A comment token
1692 //
1693 // Insert a comment.
1694 Token::Comment { .. } => {
1695 self.insert_comment(token_and_info)?;
1696 }
1697 // A DOCTYPE token
1698 //
1699 // Parse error. Ignore the token.
1700 Token::Doctype { .. } => {
1701 self.errors
1702 .push(Error::new(token_and_info.span, ErrorKind::StrayDoctype));
1703 }
1704 // A start tag whose tag name is "html"
1705 //
1706 // Process the token using the rules for the "in body" insertion mode.
1707 Token::StartTag { tag_name, .. } if tag_name == "html" => {
1708 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
1709 }
1710 // A start tag whose tag name is one of: "base", "basefont", "bgsound", "link"
1711 //
1712 // Insert an HTML element for the token. Immediately pop the current node off
1713 // the stack of open elements.
1714 //
1715 // Acknowledge the token's self-closing flag, if it is set.
1716 Token::StartTag {
1717 tag_name,
1718 is_self_closing,
1719 ..
1720 } if matches!(&**tag_name, "base" | "basefont" | "bgsound" | "link") => {
1721 let is_self_closing = *is_self_closing;
1722
1723 self.insert_html_element(token_and_info)?;
1724 self.open_elements_stack.pop();
1725
1726 if is_self_closing {
1727 token_and_info.acknowledged = true;
1728 }
1729 }
1730 // A start tag whose tag name is "meta"
1731 //
1732 // Insert an HTML element for the token. Immediately pop the current node off
1733 // the stack of open elements.
1734 //
1735 // Acknowledge the token's self-closing flag, if it is set.
1736 //
1737 // If the active speculative HTML parser is null, then:
1738 //
1739 // If the element has a charset attribute, and getting an encoding from its
1740 // value results in an encoding, and the confidence is currently tentative, then
1741 // change the encoding to the resulting encoding.
1742 //
1743 // Otherwise, if the element has an http-equiv attribute whose value is an ASCII
1744 // case-insensitive match for the string "Content-Type", and the element has a
1745 // content attribute, and applying the algorithm for extracting a character
1746 // encoding from a meta element to that attribute's value returns an encoding,
1747 // and the confidence is currently tentative, then change the encoding to the
1748 // extracted encoding.
1749 Token::StartTag {
1750 tag_name,
1751 is_self_closing,
1752 ..
1753 } if tag_name == "meta" => {
1754 let is_self_closing = *is_self_closing;
1755
1756 self.insert_html_element(token_and_info)?;
1757 self.open_elements_stack.pop();
1758
1759 if is_self_closing {
1760 token_and_info.acknowledged = true;
1761 }
1762 }
1763 // A start tag whose tag name is "title"
1764 //
1765 // Follow the generic RCDATA element parsing algorithm.
1766 Token::StartTag { tag_name, .. } if tag_name == "title" => {
1767 self.parse_generic_text_element(token_and_info, false)?;
1768 }
1769 // A start tag whose tag name is "noscript", if the scripting flag is enabled
1770 // A start tag whose tag name is one of: "noframes", "style"
1771 //
1772 // Follow the generic raw text element parsing algorithm.
1773 Token::StartTag { tag_name, .. }
1774 if tag_name == "noscript" && self.config.scripting_enabled =>
1775 {
1776 self.parse_generic_text_element(token_and_info, true)?;
1777 }
1778 Token::StartTag { tag_name, .. }
1779 if matches!(&**tag_name, "noframes" | "style") =>
1780 {
1781 self.parse_generic_text_element(token_and_info, true)?;
1782 }
1783 // A start tag whose tag name is "noscript", if the scripting flag is disabled
1784 //
1785 // Insert an HTML element for the token.
1786 //
1787 // Switch the insertion mode to "in head noscript".
1788 Token::StartTag { tag_name, .. }
1789 if tag_name == "noscript" && !self.config.scripting_enabled =>
1790 {
1791 self.insert_html_element(token_and_info)?;
1792 self.insertion_mode = InsertionMode::InHeadNoScript;
1793 }
1794 // A start tag whose tag name is "script"
1795 //
1796 // Run these steps:
1797 //
1798 // 1. Let the adjusted insertion location be the appropriate place for inserting
1799 // a node.
1800 //
1801 // 2. Create an element for the token in the HTML namespace, with the intended
1802 // parent being the element in which the adjusted insertion location finds
1803 // itself.
1804 //
1805 // 3. Set the element's parser document to the Document, and unset the element's
1806 // "non-blocking" flag.
1807 //
1808 // This ensures that, if the script is external, any document.write() calls in
1809 // the script will execute in-line, instead of blowing the document away, as
1810 // would happen in most other cases. It also prevents the script from executing
1811 // until the end tag is seen.
1812 //
1813 // 4. If the parser was created as part of the HTML fragment parsing algorithm,
1814 // then mark the script element as "already started". (fragment case)
1815 //
1816 // 5. If the parser was invoked via the document.write() or document.writeln()
1817 // methods, then optionally mark the script element as "already started". (For
1818 // example, the user agent might use this clause to prevent execution of
1819 // cross-origin scripts inserted via document.write() under slow network
1820 // conditions, or when the page has already taken a long time to load.)
1821 //
1822 // 6. Insert the newly created element at the adjusted insertion location.
1823 //
1824 // 7. Push the element onto the stack of open elements so that it is the new
1825 // current node.
1826 //
1827 // 8. Switch the tokenizer to the script data state.
1828 //
1829 // 9. Let the original insertion mode be the current insertion mode.
1830 //
1831 // 10. Switch the insertion mode to "text".
1832 Token::StartTag {
1833 tag_name,
1834 is_self_closing,
1835 ..
1836 } if tag_name == "script" => {
1837 let adjusted_insertion_location =
1838 self.get_appropriate_place_for_inserting_node(None)?;
1839 let node = self.create_element_for_token(
1840 token_and_info.token.clone(),
1841 token_and_info.span,
1842 Some(Namespace::HTML),
1843 None,
1844 );
1845
1846 // Skip script handling
1847
1848 self.insert_at_position(adjusted_insertion_location, node.clone());
1849 self.open_elements_stack.push(node);
1850 self.input.set_input_state(State::ScriptData);
1851 self.original_insertion_mode = self.insertion_mode.clone();
1852 self.insertion_mode = InsertionMode::Text;
1853 maybe_allow_self_closing!(is_self_closing, tag_name);
1854 }
1855 // An end tag whose tag name is "head"
1856 //
1857 // Pop the current node (which will be the head element) off the stack of open
1858 // elements.
1859 //
1860 // Switch the insertion mode to "after head".
1861 Token::EndTag { tag_name, .. } if tag_name == "head" => {
1862 let popped = self.open_elements_stack.pop();
1863
1864 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
1865 self.insertion_mode = InsertionMode::AfterHead;
1866 }
1867 // An end tag whose tag name is one of: "body", "html", "br"
1868 //
1869 // Act as described in the "anything else" entry below.
1870 Token::EndTag { tag_name, .. }
1871 if matches!(&**tag_name, "body" | "html" | "br") =>
1872 {
1873 anything_else(self, token_and_info)?;
1874 }
1875 // A start tag whose tag name is "template"
1876 //
1877 // Insert an HTML element for the token.
1878 //
1879 // Insert a marker at the end of the list of active formatting elements.
1880 //
1881 // Set the frameset-ok flag to "not ok".
1882 //
1883 // Switch the insertion mode to "in template".
1884 //
1885 // Push "in template" onto the stack of template insertion modes so that it is
1886 // the new current template insertion mode.
1887 Token::StartTag {
1888 tag_name,
1889 is_self_closing,
1890 ..
1891 } if tag_name == "template" => {
1892 self.insert_html_element(token_and_info)?;
1893 self.active_formatting_elements.insert_marker();
1894 self.frameset_ok = false;
1895 self.insertion_mode = InsertionMode::InTemplate;
1896 self.template_insertion_mode_stack
1897 .push(InsertionMode::InTemplate);
1898 maybe_allow_self_closing!(is_self_closing, tag_name);
1899 }
1900 // An end tag whose tag name is "template"
1901 //
1902 // If there is no template element on the stack of open elements, then this is a
1903 // parse error; ignore the token.
1904 //
1905 // Otherwise, run these steps:
1906 //
1907 // Generate all implied end tags thoroughly.
1908 //
1909 // If the current node is not a template element, then this is a parse error.
1910 //
1911 // Pop elements from the stack of open elements until a template element has
1912 // been popped from the stack.
1913 //
1914 // Clear the list of active formatting elements up to the last marker.
1915 // Pop the current template insertion mode off the stack of template insertion
1916 // modes.
1917 //
1918 // Reset the insertion mode appropriately.
1919 Token::EndTag { tag_name, .. } if tag_name == "template" => {
1920 if !self.open_elements_stack.contains_template_element() {
1921 self.errors.push(Error::new(
1922 token_and_info.span,
1923 ErrorKind::StrayEndTag(tag_name.clone()),
1924 ));
1925 } else {
1926 self.open_elements_stack
1927 .generate_implied_end_tags_thoroughly();
1928
1929 match self.open_elements_stack.items.last() {
1930 Some(node) if !is_html_element!(node, "template") => {
1931 self.errors.push(Error::new(
1932 token_and_info.span,
1933 ErrorKind::UnclosedElements(tag_name.clone()),
1934 ));
1935 }
1936 _ => {}
1937 }
1938
1939 let popped = self
1940 .open_elements_stack
1941 .pop_until_tag_name_popped(&["template"]);
1942
1943 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
1944 self.active_formatting_elements.clear_to_last_marker();
1945 self.template_insertion_mode_stack.pop();
1946 self.reset_insertion_mode();
1947 }
1948 }
1949 // A start tag whose tag name is "head"
1950 //
1951 // Any other end tag
1952 //
1953 // Parse error. Ignore the token.
1954 Token::StartTag { tag_name, .. } if tag_name == "head" => {
1955 self.errors.push(Error::new(
1956 token_and_info.span,
1957 ErrorKind::SomethingSeenWhenSomethingOpen(tag_name.clone()),
1958 ));
1959 }
1960 Token::EndTag { tag_name, .. } => {
1961 self.errors.push(Error::new(
1962 token_and_info.span,
1963 ErrorKind::StrayEndTag(tag_name.clone()),
1964 ));
1965 }
1966 // Anything else
1967 //
1968 // Pop the current node (which will be the head element) off the stack of open
1969 // elements.
1970 //
1971 // Switch the insertion mode to "after head".
1972 //
1973 // Reprocess the token.
1974 _ => {
1975 anything_else(self, token_and_info)?;
1976 }
1977 }
1978 }
1979 // The "in head noscript" insertion mode
1980 InsertionMode::InHeadNoScript => {
1981 let anything_else =
1982 |parser: &mut Parser<I>, token_and_info: &mut TokenAndInfo| -> PResult<()> {
1983 match &token_and_info.token {
1984 Token::Character { .. } => {
1985 parser.errors.push(Error::new(
1986 token_and_info.span,
1987 ErrorKind::NonSpaceCharacterInNoscriptInHead,
1988 ));
1989 }
1990 Token::StartTag { tag_name, .. } => {
1991 parser.errors.push(Error::new(
1992 token_and_info.span,
1993 ErrorKind::BadStartTagInNoscriptInHead(tag_name.clone()),
1994 ));
1995 }
1996 Token::EndTag { tag_name, .. } => {
1997 parser.errors.push(Error::new(
1998 token_and_info.span,
1999 ErrorKind::StrayEndTag(tag_name.clone()),
2000 ));
2001 }
2002 Token::Eof => {
2003 parser.errors.push(Error::new(
2004 token_and_info.span,
2005 ErrorKind::EofWithUnclosedElements,
2006 ));
2007 }
2008 _ => {
2009 unreachable!()
2010 }
2011 }
2012
2013 parser.open_elements_stack.pop();
2014 parser.insertion_mode = InsertionMode::InHead;
2015 parser.process_token(token_and_info, None)?;
2016
2017 Ok(())
2018 };
2019
2020 // When the user agent is to apply the rules for the "in head noscript"
2021 // insertion mode, the user agent must handle the token as follows:
2022 match token {
2023 // A DOCTYPE token
2024 //
2025 // Parse error. Ignore the token.
2026 Token::Doctype { .. } => {
2027 self.errors
2028 .push(Error::new(token_and_info.span, ErrorKind::StrayDoctype));
2029 }
2030 // A start tag whose tag name is "html"
2031 //
2032 // Process the token using the rules for the "in body" insertion mode.
2033 Token::StartTag { tag_name, .. } if tag_name == "html" => {
2034 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
2035 }
2036 // An end tag whose tag name is "noscript"
2037 //
2038 // Pop the current node (which will be a noscript element) from the stack of
2039 // open elements; the new current node will be a head element.
2040 //
2041 // Switch the insertion mode to "in head".
2042 Token::EndTag { tag_name, .. } if tag_name == "noscript" => {
2043 let popped = self.open_elements_stack.pop();
2044
2045 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
2046 self.insertion_mode = InsertionMode::InHead;
2047 }
2048 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
2049 // FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020
2050 // SPACE
2051 //
2052 // A comment token
2053 //
2054 // A start tag whose tag name is one of: "basefont", "bgsound", "link", "meta",
2055 // "noframes", "style"
2056 //
2057 // Process the token using the rules for the "in head" insertion mode.
2058 Token::Character {
2059 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
2060 ..
2061 } => {
2062 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
2063 }
2064 Token::Comment { .. } => {
2065 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
2066 }
2067 Token::StartTag { tag_name, .. }
2068 if matches!(
2069 &**tag_name,
2070 "basefont" | "bgsound" | "link" | "meta" | "noframes" | "style"
2071 ) =>
2072 {
2073 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
2074 }
2075 // An end tag whose tag name is "br"
2076 //
2077 // Act as described in the "anything else" entry below.
2078 Token::EndTag { tag_name, .. } if tag_name == "br" => {
2079 anything_else(self, token_and_info)?;
2080 }
2081 // A start tag whose tag name is one of: "head", "noscript"
2082 //
2083 // Any other end tag
2084 //
2085 // Parse error. Ignore the token.
2086 Token::StartTag { tag_name, .. }
2087 if matches!(&**tag_name, "head" | "noscript") =>
2088 {
2089 self.errors.push(Error::new(
2090 token_and_info.span,
2091 ErrorKind::SomethingSeenWhenSomethingOpen(tag_name.clone()),
2092 ));
2093 }
2094 Token::EndTag { tag_name, .. } => {
2095 self.errors.push(Error::new(
2096 token_and_info.span,
2097 ErrorKind::StrayEndTag(tag_name.clone()),
2098 ));
2099 }
2100 // Anything else
2101 //
2102 // Parse error.
2103 //
2104 // Pop the current node (which will be a noscript element) from the stack of
2105 // open elements; the new current node will be a head element.
2106 //
2107 // Switch the insertion mode to "in head".
2108 //
2109 // Reprocess the token.
2110 _ => {
2111 anything_else(self, token_and_info)?;
2112 }
2113 }
2114 }
2115 // The "after head" insertion mode
2116 InsertionMode::AfterHead => {
2117 let anything_else = |parser: &mut Parser<I>,
2118 token_and_info: &mut TokenAndInfo|
2119 -> PResult<()> {
2120 let span = if matches!(&token_and_info.token, Token::EndTag { tag_name, .. } if tag_name == "body")
2121 {
2122 Some(token_and_info.span)
2123 } else {
2124 None
2125 };
2126 let body_token = parser.create_fake_token_and_info("body", span);
2127
2128 parser.insert_html_element(&body_token)?;
2129 parser.insertion_mode = InsertionMode::InBody;
2130 parser.process_token(token_and_info, None)?;
2131
2132 Ok(())
2133 };
2134 // When the user agent is to apply the rules for the "after head" insertion
2135 // mode, the user agent must handle the token as follows:
2136 match token {
2137 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
2138 // FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020
2139 // SPACE
2140 //
2141 // Insert the character.
2142 Token::Character {
2143 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
2144 ..
2145 } => {
2146 self.insert_character(token_and_info)?;
2147 }
2148 // A comment token
2149 //
2150 // Insert a comment.
2151 Token::Comment { .. } => {
2152 self.insert_comment(token_and_info)?;
2153 }
2154 // A DOCTYPE token
2155 //
2156 // Parse error. Ignore the token.
2157 Token::Doctype { .. } => {
2158 self.errors
2159 .push(Error::new(token_and_info.span, ErrorKind::StrayDoctype));
2160 }
2161 // A start tag whose tag name is "html"
2162 //
2163 // Process the token using the rules for the "in body" insertion mode.
2164 Token::StartTag { tag_name, .. } if tag_name == "html" => {
2165 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
2166 }
2167 // A start tag whose tag name is "body"
2168 //
2169 // Insert an HTML element for the token.
2170 //
2171 // Set the frameset-ok flag to "not ok".
2172 //
2173 // Switch the insertion mode to "in body".
2174 Token::StartTag {
2175 tag_name,
2176 is_self_closing,
2177 ..
2178 } if tag_name == "body" => {
2179 self.insert_html_element(token_and_info)?;
2180 self.frameset_ok = false;
2181 self.insertion_mode = InsertionMode::InBody;
2182 maybe_allow_self_closing!(is_self_closing, tag_name);
2183 }
2184 // A start tag whose tag name is "frameset"
2185 //
2186 // Insert an HTML element for the token.
2187 //
2188 // Switch the insertion mode to "in frameset".
2189 Token::StartTag {
2190 tag_name,
2191 is_self_closing,
2192 ..
2193 } if tag_name == "frameset" => {
2194 self.insert_html_element(token_and_info)?;
2195 self.insertion_mode = InsertionMode::InFrameset;
2196 maybe_allow_self_closing!(is_self_closing, tag_name);
2197 }
2198 // A start tag whose tag name is one of: "base", "basefont", "bgsound", "link",
2199 // "meta", "noframes", "script", "style", "template", "title"
2200 //
2201 // Parse error.
2202 //
2203 // Push the node pointed to by the head element pointer onto the stack of open
2204 // elements.
2205 //
2206 // Process the token using the rules for the "in head" insertion mode.
2207 //
2208 // Remove the node pointed to by the head element pointer from the stack of open
2209 // elements. (It might not be the current node at this point.)
2210 Token::StartTag { tag_name, .. }
2211 if matches!(
2212 &**tag_name,
2213 "base"
2214 | "basefont"
2215 | "bgsound"
2216 | "link"
2217 | "meta"
2218 | "noframes"
2219 | "script"
2220 | "style"
2221 | "template"
2222 | "title"
2223 ) =>
2224 {
2225 self.errors.push(Error::new(
2226 token_and_info.span,
2227 ErrorKind::SomethingBetweenHeadAndBody(tag_name.clone()),
2228 ));
2229
2230 let head = self
2231 .head_element_pointer
2232 .as_ref()
2233 .expect("no head element")
2234 .clone();
2235
2236 self.open_elements_stack.push(head.clone());
2237 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
2238 self.open_elements_stack.remove(&head);
2239 }
2240 // An end tag whose tag name is "template"
2241 //
2242 // Process the token using the rules for the "in head" insertion mode.
2243 Token::EndTag { tag_name, .. } if tag_name == "template" => {
2244 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
2245 }
2246 // An end tag whose tag name is one of: "body", "html", "br"
2247 //
2248 // Act as described in the "anything else" entry below.
2249 Token::EndTag { tag_name, .. }
2250 if matches!(&**tag_name, "body" | "html" | "br") =>
2251 {
2252 anything_else(self, token_and_info)?;
2253 }
2254 // A start tag whose tag name is "head"
2255 //
2256 // Any other end tag
2257 //
2258 // Parse error. Ignore the token.
2259 Token::StartTag { tag_name, .. } if tag_name == "head" => {
2260 self.errors.push(Error::new(
2261 token_and_info.span,
2262 ErrorKind::StrayStartTag(tag_name.clone()),
2263 ));
2264 }
2265 Token::EndTag { tag_name, .. } => {
2266 self.errors.push(Error::new(
2267 token_and_info.span,
2268 ErrorKind::StrayEndTag(tag_name.clone()),
2269 ));
2270 }
2271 // Anything else
2272 //
2273 // Insert an HTML element for a "body" start tag token with no attributes.
2274 //
2275 // Switch the insertion mode to "in body".
2276 //
2277 // Reprocess the current token.
2278 _ => {
2279 anything_else(self, token_and_info)?;
2280 }
2281 }
2282 }
2283 // The "in body" insertion mode
2284 InsertionMode::InBody => {
2285 // When the user agent is to apply the rules for the "in body" insertion mode,
2286 // the user agent must handle the token as follows:
2287 match token {
2288 // A character token that is U+0000 NULL
2289 //
2290 // Parse error. Ignore the token.
2291 Token::Character { value, .. } if *value == '\x00' => self.errors.push(
2292 Error::new(token_and_info.span, ErrorKind::UnexpectedNullCharacter),
2293 ),
2294 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
2295 // FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020
2296 // SPACE
2297 //
2298 // Reconstruct the active formatting elements, if any.
2299 //
2300 // Insert the token's character.
2301 Token::Character {
2302 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
2303 ..
2304 } => {
2305 self.reconstruct_active_formatting_elements()?;
2306 self.insert_character(token_and_info)?;
2307 }
2308 // Any other character token
2309 //
2310 // Reconstruct the active formatting elements, if any.
2311 //
2312 // Insert the token's character.
2313 //
2314 // Set the frameset-ok flag to "not ok".
2315 Token::Character { .. } => {
2316 self.reconstruct_active_formatting_elements()?;
2317 self.insert_character(token_and_info)?;
2318 self.frameset_ok = false;
2319 }
2320 // A comment token
2321 //
2322 // Insert a comment.
2323 Token::Comment { .. } => {
2324 self.insert_comment(token_and_info)?;
2325 }
2326 // A DOCTYPE token
2327 //
2328 // Parse error. Ignore the token.
2329 Token::Doctype { .. } => {
2330 self.errors
2331 .push(Error::new(token_and_info.span, ErrorKind::StrayDoctype));
2332 }
2333 // A start tag whose tag name is "html"
2334 //
2335 // Parse error.
2336 //
2337 // If there is a template element on the stack of open elements, then ignore the
2338 // token.
2339 //
2340 // Otherwise, for each attribute on the token, check to see if the attribute is
2341 // already present on the top element of the stack of open elements. If it is
2342 // not, add the attribute and its corresponding value to that element.
2343 Token::StartTag {
2344 tag_name,
2345 attributes,
2346 ..
2347 } if tag_name == "html" => {
2348 self.errors.push(Error::new(
2349 token_and_info.span,
2350 ErrorKind::StrayStartTag(tag_name.clone()),
2351 ));
2352
2353 if self.open_elements_stack.contains_template_element() {
2354 // Ignore
2355 return Ok(());
2356 }
2357
2358 if let Some(top) = self.open_elements_stack.items.first() {
2359 let mut node_attributes = match &top.data {
2360 Data::Element { attributes, .. } => attributes.borrow_mut(),
2361 _ => {
2362 unreachable!();
2363 }
2364 };
2365
2366 for token_attribute in attributes {
2367 let mut found = false;
2368
2369 for attribute in node_attributes.iter() {
2370 if attribute.name == token_attribute.name {
2371 found = true;
2372
2373 break;
2374 }
2375 }
2376
2377 if !found {
2378 node_attributes.push(Attribute {
2379 span: token_attribute.span,
2380 namespace: None,
2381 prefix: None,
2382 name: token_attribute.name.clone(),
2383 raw_name: token_attribute.raw_name.clone(),
2384 value: token_attribute.value.clone(),
2385 raw_value: token_attribute.raw_value.clone(),
2386 });
2387 }
2388 }
2389 }
2390 }
2391 // A start tag whose tag name is one of: "base", "basefont", "bgsound", "link",
2392 // "meta", "noframes", "script", "style", "template", "title"
2393 //
2394 // An end tag whose tag name is "template"
2395 //
2396 // Process the token using the rules for the "in head" insertion mode.
2397 Token::StartTag { tag_name, .. }
2398 if matches!(
2399 &**tag_name,
2400 "base"
2401 | "basefont"
2402 | "bgsound"
2403 | "link"
2404 | "meta"
2405 | "noframes"
2406 | "script"
2407 | "style"
2408 | "template"
2409 | "title"
2410 ) =>
2411 {
2412 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
2413 }
2414 Token::EndTag { tag_name, .. } if tag_name == "template" => {
2415 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
2416 }
2417 // A start tag whose tag name is "body"
2418 //
2419 // Parse error.
2420 //
2421 // If the second element on the stack of open elements is not a body element, if
2422 // the stack of open elements has only one node on it, or if there is a template
2423 // element on the stack of open elements, then ignore the token. (fragment case)
2424 //
2425 // Otherwise, set the frameset-ok flag to "not ok"; then, for each attribute on
2426 // the token, check to see if the attribute is already present on the body
2427 // element (the second element) on the stack of open elements, and if it is not,
2428 // add the attribute and its corresponding value to that element.
2429 Token::StartTag {
2430 tag_name,
2431 attributes,
2432 ..
2433 } if tag_name == "body" => {
2434 self.errors.push(Error::new(
2435 token_and_info.span,
2436 ErrorKind::SomethingSeenWhenSomethingOpen(tag_name.clone()),
2437 ));
2438
2439 let is_second_body = matches!(self.open_elements_stack.items.get(1), Some(node) if is_html_element!(node, "body"));
2440
2441 if !is_second_body
2442 || self.open_elements_stack.items.len() == 1
2443 || self.open_elements_stack.contains_template_element()
2444 {
2445 // Ignore
2446 // Fragment case
2447 return Ok(());
2448 }
2449
2450 self.frameset_ok = false;
2451
2452 if let Some(top) = self.open_elements_stack.items.get(1) {
2453 let mut node_attributes = match &top.data {
2454 Data::Element { attributes, .. } => attributes.borrow_mut(),
2455 _ => {
2456 unreachable!();
2457 }
2458 };
2459
2460 for token_attribute in attributes {
2461 let mut found = false;
2462
2463 for attribute in node_attributes.iter() {
2464 if attribute.name == token_attribute.name {
2465 found = true;
2466
2467 break;
2468 }
2469 }
2470
2471 if !found {
2472 node_attributes.push(Attribute {
2473 span: token_attribute.span,
2474 namespace: None,
2475 prefix: None,
2476 name: token_attribute.name.clone(),
2477 raw_name: token_attribute.raw_name.clone(),
2478 value: token_attribute.value.clone(),
2479 raw_value: token_attribute.raw_value.clone(),
2480 });
2481 }
2482 }
2483 }
2484 }
2485 // A start tag whose tag name is "frameset"
2486 //
2487 // Parse error.
2488 //
2489 // If the stack of open elements has only one node on it, or if the second
2490 // element on the stack of open elements is not a body element, then ignore the
2491 // token. (fragment case)
2492 //
2493 // If the frameset-ok flag is set to "not ok", ignore the token.
2494 //
2495 // Otherwise, run the following steps:
2496 //
2497 // Remove the second element on the stack of open elements from its parent node,
2498 // if it has one.
2499 //
2500 // Pop all the nodes from the bottom of the stack of open elements, from the
2501 // current node up to, but not including, the root html element.
2502 //
2503 // Insert an HTML element for the token.
2504 //
2505 // Switch the insertion mode to "in frameset".
2506 Token::StartTag { tag_name, .. } if tag_name == "frameset" => {
2507 self.errors.push(Error::new(
2508 token_and_info.span,
2509 ErrorKind::StrayStartTag(tag_name.clone()),
2510 ));
2511
2512 let len = self.open_elements_stack.items.len();
2513 let body = self.open_elements_stack.items.get(1);
2514 let is_second_body =
2515 matches!(body, Some(node) if is_html_element!(node, "body"));
2516
2517 if len == 1 || !is_second_body {
2518 // Fragment case
2519 // Ignore the token
2520 return Ok(());
2521 }
2522
2523 if !self.frameset_ok {
2524 // Ignore
2525 return Ok(());
2526 }
2527
2528 if let Some(body) = &body {
2529 if let Some((parent, i)) = self.get_parent_and_index(body) {
2530 parent.children.borrow_mut().remove(i);
2531
2532 body.parent.set(None);
2533 }
2534 }
2535
2536 self.open_elements_stack.items.truncate(1);
2537 self.insert_html_element(token_and_info)?;
2538 self.insertion_mode = InsertionMode::InFrameset;
2539 }
2540 // An end-of-file token
2541 //
2542 // If the stack of template insertion modes is not empty, then process the token
2543 // using the rules for the "in template" insertion mode.
2544 //
2545 // Otherwise, follow these steps:
2546 //
2547 // If there is a node in the stack of open elements that is not either a dd
2548 // element, a dt element, an li element, an optgroup element, an option element,
2549 // a p element, an rb element, an rp element, an rt element, an rtc element, a
2550 // tbody element, a td element, a tfoot element, a th element, a thead element,
2551 // a tr element, the body element, or the html element, then this is a parse
2552 // error.
2553 //
2554 // Stop parsing.
2555 Token::Eof => {
2556 if !self.template_insertion_mode_stack.is_empty() {
2557 self.process_token_using_rules(
2558 token_and_info,
2559 InsertionMode::InTemplate,
2560 )?;
2561
2562 return Ok(());
2563 }
2564
2565 self.update_end_tag_span(
2566 self.open_elements_stack.items.last(),
2567 token_and_info.span,
2568 );
2569
2570 for node in &self.open_elements_stack.items {
2571 if !is_html_element!(
2572 node,
2573 "dd" | "dt"
2574 | "li"
2575 | "optgroup"
2576 | "option"
2577 | "p"
2578 | "rb"
2579 | "rp"
2580 | "rt"
2581 | "rtc"
2582 | "tbody"
2583 | "td"
2584 | "tfoot"
2585 | "th"
2586 | "thead"
2587 | "tr"
2588 | "body"
2589 | "html"
2590 ) {
2591 self.errors.push(Error::new(
2592 token_and_info.span,
2593 ErrorKind::EofWithUnclosedElements,
2594 ));
2595
2596 break;
2597 }
2598 }
2599
2600 self.stopped = true;
2601 }
2602 // An end tag whose tag name is "body"
2603 //
2604 // If the stack of open elements does not have a body element in scope, this is
2605 // a parse error; ignore the token.
2606 //
2607 // Otherwise, if there is a node in the stack of open elements that is not
2608 // either a dd element, a dt element, an li element, an optgroup element, an
2609 // option element, a p element, an rb element, an rp element, an rt element, an
2610 // rtc element, a tbody element, a td element, a tfoot element, a th element, a
2611 // thead element, a tr element, the body element, or the html element, then this
2612 // is a parse error.
2613 //
2614 // Switch the insertion mode to "after body".
2615 Token::EndTag { tag_name, .. } if tag_name == "body" => {
2616 if !self.open_elements_stack.has_in_scope("body") {
2617 self.errors.push(Error::new(
2618 token_and_info.span,
2619 ErrorKind::StrayEndTag(tag_name.clone()),
2620 ));
2621
2622 return Ok(());
2623 } else {
2624 self.update_end_tag_span(
2625 self.open_elements_stack.items.get(1),
2626 token_and_info.span,
2627 );
2628 }
2629
2630 for node in &self.open_elements_stack.items {
2631 if !is_html_element!(
2632 node,
2633 "dd" | "dt"
2634 | "li"
2635 | "optgroup"
2636 | "option"
2637 | "p"
2638 | "rb"
2639 | "rp"
2640 | "rt"
2641 | "rtc"
2642 | "tbody"
2643 | "td"
2644 | "tfoot"
2645 | "th"
2646 | "thead"
2647 | "tr"
2648 | "body"
2649 | "html"
2650 ) {
2651 self.errors.push(Error::new(
2652 token_and_info.span,
2653 ErrorKind::EndTagWithUnclosedElements(atom!("body")),
2654 ));
2655
2656 break;
2657 }
2658 }
2659
2660 self.insertion_mode = InsertionMode::AfterBody;
2661 }
2662 // An end tag whose tag name is "html"
2663 //
2664 // If the stack of open elements does not have a body element in scope, this is
2665 // a parse error; ignore the token.
2666 //
2667 // Otherwise, if there is a node in the stack of open elements that is not
2668 // either a dd element, a dt element, an li element, an optgroup element, an
2669 // option element, a p element, an rb element, an rp element, an rt element, an
2670 // rtc element, a tbody element, a td element, a tfoot element, a th element, a
2671 // thead element, a tr element, the body element, or the html element, then this
2672 // is a parse error.
2673 //
2674 // Switch the insertion mode to "after body".
2675 //
2676 // Reprocess the token.
2677 Token::EndTag { tag_name, .. } if tag_name == "html" => {
2678 if !self.open_elements_stack.has_in_scope("body") {
2679 self.errors.push(Error::new(
2680 token_and_info.span,
2681 ErrorKind::StrayEndTag(tag_name.clone()),
2682 ));
2683
2684 return Ok(());
2685 } else {
2686 self.update_end_tag_span(
2687 self.open_elements_stack.items.first(),
2688 token_and_info.span,
2689 );
2690 }
2691
2692 for node in &self.open_elements_stack.items {
2693 if !is_html_element!(
2694 node,
2695 "dd" | "dt"
2696 | "li"
2697 | "optgroup"
2698 | "option"
2699 | "p"
2700 | "rb"
2701 | "rp"
2702 | "rt"
2703 | "rtc"
2704 | "tbody"
2705 | "td"
2706 | "tfoot"
2707 | "th"
2708 | "thead"
2709 | "tr"
2710 | "body"
2711 | "html"
2712 ) {
2713 self.errors.push(Error::new(
2714 token_and_info.span,
2715 ErrorKind::EndTagWithUnclosedElements(atom!("html")),
2716 ));
2717
2718 break;
2719 }
2720 }
2721
2722 self.insertion_mode = InsertionMode::AfterBody;
2723 self.process_token(token_and_info, None)?;
2724 }
2725 // A start tag whose tag name is one of: "address", "article", "aside",
2726 // "blockquote", "center", "details", "dialog", "dir", "div", "dl", "fieldset",
2727 // "figcaption", "figure", "footer", "header", "hgroup", "main", "menu", "nav",
2728 // "ol", "p", "section", "summary", "ul"
2729 //
2730 // If the stack of open elements has a p element in button scope, then close a p
2731 // element.
2732 //
2733 // Insert an HTML element for the token.
2734 Token::StartTag {
2735 tag_name,
2736 is_self_closing,
2737 ..
2738 } if matches!(
2739 &**tag_name,
2740 "address"
2741 | "article"
2742 | "aside"
2743 | "blockquote"
2744 | "center"
2745 | "details"
2746 | "dialog"
2747 | "dir"
2748 | "div"
2749 | "dl"
2750 | "fieldset"
2751 | "figcaption"
2752 | "figure"
2753 | "footer"
2754 | "header"
2755 | "hgroup"
2756 | "main"
2757 | "menu"
2758 | "nav"
2759 | "ol"
2760 | "p"
2761 | "section"
2762 | "summary"
2763 | "ul"
2764 ) =>
2765 {
2766 if self.open_elements_stack.has_in_button_scope("p") {
2767 self.close_p_element(token_and_info, false);
2768 }
2769
2770 self.insert_html_element(token_and_info)?;
2771 maybe_allow_self_closing!(is_self_closing, tag_name);
2772 }
2773 // A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
2774 //
2775 // If the stack of open elements has a p element in button scope, then close a p
2776 // element.
2777 //
2778 // If the current node is an HTML element whose tag name is one of "h1", "h2",
2779 // "h3", "h4", "h5", or "h6", then this is a parse error; pop the current node
2780 // off the stack of open elements.
2781 //
2782 // Insert an HTML element for the token.
2783 Token::StartTag {
2784 tag_name,
2785 is_self_closing,
2786 ..
2787 } if matches!(&**tag_name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") => {
2788 if self.open_elements_stack.has_in_button_scope("p") {
2789 self.close_p_element(token_and_info, false);
2790 }
2791
2792 match self.open_elements_stack.items.last() {
2793 Some(node)
2794 if is_html_element!(
2795 node,
2796 "h1" | "h2" | "h3" | "h4" | "h5" | "h6"
2797 ) =>
2798 {
2799 self.errors.push(Error::new(
2800 token_and_info.span,
2801 ErrorKind::HeadingWhenHeadingOpen,
2802 ));
2803
2804 self.open_elements_stack.pop();
2805 }
2806 _ => {}
2807 }
2808
2809 self.insert_html_element(token_and_info)?;
2810 maybe_allow_self_closing!(is_self_closing, tag_name);
2811 }
2812 // A start tag whose tag name is one of: "pre", "listing"
2813 //
2814 // If the stack of open elements has a p element in button scope, then close a p
2815 // element.
2816 //
2817 // Insert an HTML element for the token.
2818 //
2819 // If the next token is a U+000A LINE FEED (LF) character token, then ignore
2820 // that token and move on to the next one. (Newlines at the start of pre blocks
2821 // are ignored as an authoring convenience.)
2822 //
2823 // Set the frameset-ok flag to "not ok".
2824 Token::StartTag { tag_name, .. }
2825 if matches!(&**tag_name, "pre" | "listing") =>
2826 {
2827 if self.open_elements_stack.has_in_button_scope("p") {
2828 self.close_p_element(token_and_info, false);
2829 }
2830
2831 self.insert_html_element(token_and_info)?;
2832
2833 match self.input.cur()? {
2834 Some(Token::Character { value, .. }) if *value == '\n' => {
2835 bump!(self);
2836 }
2837 _ => {}
2838 }
2839
2840 self.frameset_ok = false;
2841 }
2842 // A start tag whose tag name is "form"
2843 //
2844 // If the form element pointer is not null, and there is no template element on
2845 // the stack of open elements, then this is a parse error; ignore the token.
2846 //
2847 // Otherwise:
2848 //
2849 // If the stack of open elements has a p element in button scope, then close a p
2850 // element.
2851 //
2852 // Insert an HTML element for the token, and, if there is no template element on
2853 // the stack of open elements, set the form element pointer to point to the
2854 // element created.
2855 Token::StartTag {
2856 tag_name,
2857 is_self_closing,
2858 ..
2859 } if tag_name == "form" => {
2860 if self.form_element_pointer.is_some()
2861 && !self.open_elements_stack.contains_template_element()
2862 {
2863 self.errors
2864 .push(Error::new(token_and_info.span, ErrorKind::FormWhenFormOpen));
2865
2866 return Ok(());
2867 }
2868
2869 if self.open_elements_stack.has_in_button_scope("p") {
2870 self.close_p_element(token_and_info, false);
2871 }
2872
2873 let element = self.insert_html_element(token_and_info)?;
2874
2875 if !self.open_elements_stack.contains_template_element() {
2876 self.form_element_pointer = Some(element);
2877 }
2878
2879 maybe_allow_self_closing!(is_self_closing, tag_name);
2880 }
2881 // A start tag whose tag name is "li"
2882 //
2883 // Run these steps:
2884 //
2885 // Set the frameset-ok flag to "not ok".
2886 //
2887 // Initialize node to be the current node (the bottommost node of the stack).
2888 //
2889 // Loop: If node is an li element, then run these substeps:
2890 //
2891 // Generate implied end tags, except for li elements.
2892 //
2893 // If the current node is not an li element, then this is a parse error.
2894 //
2895 // Pop elements from the stack of open elements until an li element has been
2896 // popped from the stack.
2897 //
2898 // Jump to the step labeled done below.
2899 //
2900 // If node is in the special category, but is not an address, div, or p element,
2901 // then jump to the step labeled done below.
2902 //
2903 // Otherwise, set node to the previous entry in the stack of open elements and
2904 // return to the step labeled loop.
2905 //
2906 // Done: If the stack of open elements has a p element in button scope, then
2907 // close a p element.
2908 //
2909 // Finally, insert an HTML element for the token.
2910 Token::StartTag {
2911 tag_name,
2912 is_self_closing,
2913 ..
2914 } if tag_name == "li" => {
2915 self.frameset_ok = false;
2916
2917 // Initialise node to be the current node (the bottommost node of
2918 // the stack).
2919 // Step "Loop".
2920 for node in self.open_elements_stack.items.iter().rev() {
2921 if is_html_element!(node, "li") {
2922 // Generate implied end tags, except for li elements.
2923 self.open_elements_stack
2924 .generate_implied_end_tags_with_exclusion("li");
2925
2926 // If the current node is not an li element, then this is a
2927 // parse error.
2928 match self.open_elements_stack.items.last() {
2929 Some(node) if !is_html_element!(node, "li") => {
2930 self.errors.push(Error::new(
2931 token_and_info.span,
2932 ErrorKind::UnclosedElementsImplied(atom!("li")),
2933 ));
2934 }
2935 _ => {}
2936 }
2937
2938 // Pop elements from the stack of open elements until an li
2939 // element has been popped from the stack.
2940 self.open_elements_stack.pop_until_tag_name_popped(&["li"]);
2941
2942 // Jump to the step labeled done below.
2943 break;
2944 }
2945
2946 // If node is in the special category, but is not an address,
2947 // div, or p element, then jump to the step labeled done below.
2948 // Otherwise, set node to the previous entry in the stack
2949 // of open elements and return to the step labeled loop.
2950 if self.is_special_element(node)
2951 && !is_html_element!(node, "address" | "div" | "p")
2952 {
2953 break;
2954 }
2955 }
2956
2957 // Step "Done".
2958 // If the stack of open elements has a p element in button scope,
2959 // then close a p element.
2960 if self.open_elements_stack.has_in_button_scope("p") {
2961 self.close_p_element(token_and_info, false);
2962 }
2963
2964 self.insert_html_element(token_and_info)?;
2965 maybe_allow_self_closing!(is_self_closing, tag_name);
2966 }
2967 // A start tag whose tag name is one of: "dd", "dt"
2968 //
2969 // Run these steps:
2970 //
2971 // Set the frameset-ok flag to "not ok".
2972 //
2973 // Initialize node to be the current node (the bottommost node of the stack).
2974 //
2975 // Loop: If node is a dd element, then run these substeps:
2976 //
2977 // Generate implied end tags, except for dd elements.
2978 //
2979 // If the current node is not a dd element, then this is a parse error.
2980 //
2981 // Pop elements from the stack of open elements until a dd element has been
2982 // popped from the stack.
2983 //
2984 // Jump to the step labeled done below.
2985 //
2986 // If node is a dt element, then run these substeps:
2987 //
2988 // Generate implied end tags, except for dt elements.
2989 //
2990 // If the current node is not a dt element, then this is a parse error.
2991 //
2992 // Pop elements from the stack of open elements until a dt element has been
2993 // popped from the stack.
2994 //
2995 // Jump to the step labeled done below.
2996 //
2997 // If node is in the special category, but is not an address, div, or p element,
2998 // then jump to the step labeled done below.
2999 //
3000 // Otherwise, set node to the previous entry in the stack of open elements and
3001 // return to the step labeled loop.
3002 //
3003 // Done: If the stack of open elements has a p element in button scope, then
3004 // close a p element.
3005 //
3006 // Finally, insert an HTML element for the token.
3007 Token::StartTag {
3008 tag_name,
3009 is_self_closing,
3010 ..
3011 } if matches!(&**tag_name, "dd" | "dt") => {
3012 self.frameset_ok = false;
3013
3014 // Initialise node to be the current node (the bottommost node of
3015 // the stack).
3016 // Step "Loop".
3017 for node in self.open_elements_stack.items.iter().rev() {
3018 if is_html_element!(node, "dd") {
3019 // Generate implied end tags, except for dd elements.
3020 self.open_elements_stack
3021 .generate_implied_end_tags_with_exclusion("dd");
3022
3023 // If the current node is not an dd element, then this is a
3024 // parse error.
3025 match self.open_elements_stack.items.last() {
3026 Some(node) if !is_html_element!(node, "dd") => {
3027 self.errors.push(Error::new(
3028 token_and_info.span,
3029 ErrorKind::UnclosedElementsImplied(atom!("dd")),
3030 ));
3031 }
3032 _ => {}
3033 }
3034
3035 // Pop elements from the stack of open elements until an dd
3036 // element has been popped from the stack.
3037 self.open_elements_stack.pop_until_tag_name_popped(&["dd"]);
3038
3039 // Jump to the step labeled done below.
3040 break;
3041 } else if is_html_element!(node, "dt") {
3042 // Generate implied end tags, except for li elements.
3043 self.open_elements_stack
3044 .generate_implied_end_tags_with_exclusion("dt");
3045
3046 // If the current node is not an dt element, then this is a
3047 // parse error.
3048 match self.open_elements_stack.items.last() {
3049 Some(node) if !is_html_element!(node, "dt") => {
3050 self.errors.push(Error::new(
3051 token_and_info.span,
3052 ErrorKind::UnclosedElementsImplied(atom!("dt")),
3053 ));
3054 }
3055 _ => {}
3056 }
3057
3058 // Pop elements from the stack of open elements until an dt
3059 // element has been popped from the stack.
3060 self.open_elements_stack.pop_until_tag_name_popped(&["dt"]);
3061
3062 // Jump to the step labeled done below.
3063 break;
3064 }
3065
3066 // If node is in the special category, but is not an address,
3067 // div, or p element, then jump to the step labeled done below.
3068 // Otherwise, set node to the previous entry in the stack of
3069 // open elements and return to the step labeled loop.
3070 if self.is_special_element(node)
3071 && !is_html_element!(node, "address" | "div" | "p")
3072 {
3073 break;
3074 }
3075 }
3076
3077 // Step "Done".
3078 // If the stack of open elements has a p element in button scope,
3079 // then close a p element.
3080 if self.open_elements_stack.has_in_button_scope("p") {
3081 self.close_p_element(token_and_info, false);
3082 }
3083
3084 self.insert_html_element(token_and_info)?;
3085 maybe_allow_self_closing!(is_self_closing, tag_name);
3086 }
3087 // A start tag whose tag name is "plaintext"
3088 //
3089 // If the stack of open elements has a p element in button scope, then close a p
3090 // element.
3091 //
3092 // Insert an HTML element for the token.
3093 //
3094 // Switch the tokenizer to the PLAINTEXT state.
3095 Token::StartTag { tag_name, .. } if tag_name == "plaintext" => {
3096 if self.open_elements_stack.has_in_button_scope("p") {
3097 self.close_p_element(token_and_info, false);
3098 }
3099
3100 self.insert_html_element(token_and_info)?;
3101 self.input.set_input_state(State::PlainText);
3102 }
3103 // A start tag whose tag name is "button"
3104 //
3105 // 1. If the stack of open elements has a button element in scope, then run
3106 // these substeps:
3107 //
3108 // 1. Parse error.
3109 //
3110 // 2. Generate implied end tags.
3111 //
3112 // 3. Pop elements from the stack of open elements until a button element has
3113 // been popped from the stack.
3114 //
3115 // 2. Reconstruct the active formatting elements, if any.
3116 //
3117 // 3. Insert an HTML element for the token.
3118 //
3119 // 4. Set the frameset-ok flag to "not ok".
3120 Token::StartTag { tag_name, .. } if tag_name == "button" => {
3121 if self.open_elements_stack.has_in_scope("button") {
3122 self.errors.push(Error::new(
3123 token_and_info.span,
3124 ErrorKind::SomethingSeenWhenSomethingOpen(tag_name.clone()),
3125 ));
3126 self.open_elements_stack.generate_implied_end_tags();
3127 self.open_elements_stack
3128 .pop_until_tag_name_popped(&["button"]);
3129 }
3130
3131 self.reconstruct_active_formatting_elements()?;
3132 self.insert_html_element(token_and_info)?;
3133 self.frameset_ok = false;
3134 }
3135 // An end tag whose tag name is one of: "address", "article", "aside",
3136 // "blockquote", "button", "center", "details", "dialog", "dir", "div", "dl",
3137 // "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing",
3138 // "main", "menu", "nav", "ol", "pre", "section", "summary", "ul"
3139 //
3140 // If the stack of open elements does not have an element in scope that is an
3141 // HTML element with the same tag name as that of the token, then this is a
3142 // parse error; ignore the token.
3143 //
3144 // Otherwise, run these steps:
3145 //
3146 // Generate implied end tags.
3147 //
3148 // If the current node is not an HTML element with the same tag name as that of
3149 // the token, then this is a parse error.
3150 //
3151 // Pop elements from the stack of open elements until an HTML element with the
3152 // same tag name as the token has been popped from the stack.
3153 Token::EndTag { tag_name, .. }
3154 if matches!(
3155 &**tag_name,
3156 "address"
3157 | "article"
3158 | "aside"
3159 | "blockquote"
3160 | "button"
3161 | "center"
3162 | "details"
3163 | "dialog"
3164 | "dir"
3165 | "div"
3166 | "dl"
3167 | "fieldset"
3168 | "figcaption"
3169 | "figure"
3170 | "footer"
3171 | "header"
3172 | "hgroup"
3173 | "listing"
3174 | "main"
3175 | "menu"
3176 | "nav"
3177 | "ol"
3178 | "pre"
3179 | "section"
3180 | "summary"
3181 | "ul"
3182 ) =>
3183 {
3184 if !self.open_elements_stack.has_in_scope(tag_name) {
3185 self.errors.push(Error::new(
3186 token_and_info.span,
3187 ErrorKind::StrayEndTag(tag_name.clone()),
3188 ));
3189 } else {
3190 self.open_elements_stack.generate_implied_end_tags();
3191
3192 match self.open_elements_stack.items.last() {
3193 Some(node) if !is_html_element_with_tag_name!(node, tag_name) => {
3194 self.errors.push(Error::new(
3195 token_and_info.span,
3196 ErrorKind::UnclosedElements(tag_name.clone()),
3197 ));
3198 }
3199 _ => {}
3200 }
3201
3202 let popped = self
3203 .open_elements_stack
3204 .pop_until_tag_name_popped(&[tag_name]);
3205
3206 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
3207 }
3208 }
3209 // An end tag whose tag name is "form"
3210 //
3211 // If there is no template element on the stack of open elements, then run these
3212 // substeps:
3213 //
3214 // 1. Let node be the element that the form element pointer is set to, or null
3215 // if it is not set to an element.
3216 //
3217 // 2. Set the form element pointer to null.
3218 //
3219 // 3. If node is null or if the stack of open elements does not have node in
3220 // scope, then this is a parse error; return and ignore the
3221 // token.
3222 //
3223 // 4. Generate implied end tags.
3224 //
3225 // 5. If the current node is not node, then this is a parse error.
3226 //
3227 // 6. Remove node from the stack of open elements.
3228 //
3229 // If there is a template element on the stack of open elements, then run these
3230 // substeps instead:
3231 //
3232 // 1. If the stack of open elements does not have a form element in scope, then
3233 // this is a parse error; return and ignore the token.
3234 //
3235 // 2, Generate implied end tags.
3236 //
3237 // 3. If the current node is not a form element, then this is a parse error.
3238 //
3239 // 4. Pop elements from the stack of open elements until a form element has been
3240 // popped from the stack.
3241 Token::EndTag { tag_name, .. } if tag_name == "form" => {
3242 if !self.open_elements_stack.contains_template_element() {
3243 let node = match self.form_element_pointer.take() {
3244 None => {
3245 self.errors.push(Error::new(
3246 token_and_info.span,
3247 ErrorKind::StrayEndTag(tag_name.clone()),
3248 ));
3249
3250 return Ok(());
3251 }
3252 Some(x) => Some(x),
3253 };
3254
3255 self.form_element_pointer = None;
3256
3257 if node.is_none()
3258 || !self
3259 .open_elements_stack
3260 .has_node_in_scope(node.as_ref().unwrap())
3261 {
3262 self.errors.push(Error::new(
3263 token_and_info.span,
3264 ErrorKind::StrayEndTag(tag_name.clone()),
3265 ));
3266
3267 return Ok(());
3268 }
3269
3270 let node = node.unwrap();
3271
3272 self.open_elements_stack.generate_implied_end_tags();
3273
3274 let current = self.open_elements_stack.items.last();
3275
3276 if !is_same_node(&node, current.unwrap()) {
3277 self.errors.push(Error::new(
3278 token_and_info.span,
3279 ErrorKind::UnclosedElements(tag_name.clone()),
3280 ));
3281 } else {
3282 self.update_end_tag_span(Some(&node), token_and_info.span);
3283 }
3284
3285 self.open_elements_stack.remove(&node);
3286 } else {
3287 if !self.open_elements_stack.has_in_scope("form") {
3288 self.errors.push(Error::new(
3289 token_and_info.span,
3290 ErrorKind::StrayEndTag(tag_name.clone()),
3291 ));
3292
3293 return Ok(());
3294 }
3295
3296 self.open_elements_stack.generate_implied_end_tags();
3297
3298 match self.open_elements_stack.items.last() {
3299 Some(node) if !is_html_element!(node, "form") => {
3300 self.errors.push(Error::new(
3301 token_and_info.span,
3302 ErrorKind::UnclosedElements(tag_name.clone()),
3303 ));
3304 }
3305 _ => {}
3306 }
3307
3308 let popped = self
3309 .open_elements_stack
3310 .pop_until_tag_name_popped(&["form"]);
3311
3312 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
3313 }
3314 }
3315 // An end tag whose tag name is "p"
3316 //
3317 // If the stack of open elements does not have a p element in button scope, then
3318 // this is a parse error; insert an HTML element for a "p" start tag token with
3319 // no attributes.
3320 //
3321 // Close a p element.
3322 Token::EndTag { tag_name, .. } if tag_name == "p" => {
3323 if !self.open_elements_stack.has_in_button_scope("p") {
3324 self.errors.push(Error::new(
3325 token_and_info.span,
3326 ErrorKind::NoElementToCloseButEndTagSeen(tag_name.clone()),
3327 ));
3328
3329 self.insert_html_element(
3330 &self.create_fake_token_and_info("p", Some(token_and_info.span)),
3331 )?;
3332 }
3333
3334 self.close_p_element(token_and_info, true);
3335 }
3336 // An end tag whose tag name is "li"
3337 //
3338 // If the stack of open elements does not have an li element in list item scope,
3339 // then this is a parse error; ignore the token.
3340 //
3341 // Otherwise, run these steps:
3342 //
3343 // Generate implied end tags, except for li elements.
3344 //
3345 // If the current node is not an li element, then this is a parse error.
3346 //
3347 // Pop elements from the stack of open elements until an li element has been
3348 // popped from the stack.
3349 Token::EndTag { tag_name, .. } if tag_name == "li" => {
3350 if !self.open_elements_stack.has_in_list_item_scope("li") {
3351 self.errors.push(Error::new(
3352 token_and_info.span,
3353 ErrorKind::NoElementToCloseButEndTagSeen(tag_name.clone()),
3354 ));
3355 } else {
3356 self.open_elements_stack
3357 .generate_implied_end_tags_with_exclusion("li");
3358
3359 match self.open_elements_stack.items.last() {
3360 Some(node) if !is_html_element!(node, "li") => {
3361 self.errors.push(Error::new(
3362 token_and_info.span,
3363 ErrorKind::UnclosedElements(tag_name.clone()),
3364 ));
3365 }
3366 _ => {}
3367 }
3368
3369 let popped =
3370 self.open_elements_stack.pop_until_tag_name_popped(&["li"]);
3371
3372 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
3373 }
3374 }
3375 // An end tag whose tag name is one of: "dd", "dt"
3376 //
3377 // If the stack of open elements does not have an element in scope that is an
3378 // HTML element with the same tag name as that of the token, then this is a
3379 // parse error; ignore the token.
3380 //
3381 // Otherwise, run these steps:
3382 //
3383 // Generate implied end tags, except for HTML elements with the same tag name as
3384 // the token.
3385 //
3386 // If the current node is not an HTML element with the same tag name as that of
3387 // the token, then this is a parse error.
3388 //
3389 // Pop elements from the stack of open elements until an HTML element with the
3390 // same tag name as the token has been popped from the stack.
3391 Token::EndTag { tag_name, .. } if matches!(&**tag_name, "dd" | "dt") => {
3392 if !self.open_elements_stack.has_in_scope(tag_name) {
3393 self.errors.push(Error::new(
3394 token_and_info.span,
3395 ErrorKind::NoElementToCloseButEndTagSeen(tag_name.clone()),
3396 ));
3397 } else {
3398 self.open_elements_stack
3399 .generate_implied_end_tags_with_exclusion(tag_name);
3400
3401 match self.open_elements_stack.items.last() {
3402 Some(node) if !is_html_element_with_tag_name!(node, tag_name) => {
3403 self.errors.push(Error::new(
3404 token_and_info.span,
3405 ErrorKind::UnclosedElements(tag_name.clone()),
3406 ));
3407 }
3408 _ => {}
3409 }
3410
3411 let popped = self
3412 .open_elements_stack
3413 .pop_until_tag_name_popped(&[tag_name]);
3414
3415 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
3416 }
3417 }
3418 // An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6"
3419 //
3420 // If the stack of open elements does not have an element in scope that is an
3421 // HTML element and whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
3422 // "h6", then this is a parse error; ignore the token.
3423 //
3424 // Otherwise, run these steps:
3425 //
3426 // Generate implied end tags.
3427 //
3428 // If the current node is not an HTML element with the same tag name as that of
3429 // the token, then this is a parse error.
3430 //
3431 // Pop elements from the stack of open elements until an HTML element whose tag
3432 // name is one of "h1", "h2", "h3", "h4", "h5", or "h6" has been popped from the
3433 // stack.
3434 Token::EndTag { tag_name, .. }
3435 if matches!(&**tag_name, "h1" | "h2" | "h3" | "h4" | "h5" | "h6") =>
3436 {
3437 if !self.open_elements_stack.has_in_scope("h1")
3438 && !self.open_elements_stack.has_in_scope("h2")
3439 && !self.open_elements_stack.has_in_scope("h3")
3440 && !self.open_elements_stack.has_in_scope("h4")
3441 && !self.open_elements_stack.has_in_scope("h5")
3442 && !self.open_elements_stack.has_in_scope("h6")
3443 {
3444 self.errors.push(Error::new(
3445 token_and_info.span,
3446 ErrorKind::StrayEndTag(tag_name.clone()),
3447 ));
3448 } else {
3449 self.open_elements_stack.generate_implied_end_tags();
3450
3451 if let Some(node) = self.open_elements_stack.items.last() {
3452 if !is_html_element_with_tag_name!(node, tag_name) {
3453 self.errors.push(Error::new(
3454 token_and_info.span,
3455 ErrorKind::UnclosedElements(tag_name.clone()),
3456 ));
3457 } else {
3458 self.update_end_tag_span(Some(node), token_and_info.span);
3459 }
3460 }
3461
3462 self.open_elements_stack
3463 .pop_until_tag_name_popped(&["h1", "h2", "h3", "h4", "h5", "h6"]);
3464 }
3465 }
3466 // An end tag whose tag name is "sarcasm"
3467 //
3468 // Take a deep breath, then act as described in the "any other end tag" entry
3469 // below.
3470 //
3471 // Skip, we will be in `Token::EndTag` branch with the same logic
3472 //
3473 //
3474 //
3475 // A start tag whose tag name is "a"
3476 //
3477 // If the list of active formatting elements contains an a element between the
3478 // end of the list and the last marker on the list (or the start of the list if
3479 // there is no marker on the list), then this is a parse error; run the adoption
3480 // agency algorithm for the token, then remove that element from the list of
3481 // active formatting elements and the stack of open elements if the adoption
3482 // agency algorithm didn't already remove it (it might not have if the element
3483 // is not in table scope).
3484 //
3485 // Reconstruct the active formatting elements, if any.
3486 //
3487 // Insert an HTML element for the token. Push onto the list of active formatting
3488 // elements that element.
3489 Token::StartTag {
3490 tag_name,
3491 is_self_closing,
3492 ..
3493 } if tag_name == "a" => {
3494 if !self.active_formatting_elements.items.is_empty() {
3495 let mut node = None;
3496
3497 for element in self.active_formatting_elements.items.iter().rev() {
3498 match element {
3499 ActiveFormattingElement::Marker => {
3500 break;
3501 }
3502 ActiveFormattingElement::Element(item, _) => {
3503 if is_html_element!(item, "a") {
3504 node = Some(item);
3505
3506 break;
3507 }
3508 }
3509 }
3510 }
3511
3512 if let Some(element) = node {
3513 self.errors.push(Error::new(
3514 token_and_info.span,
3515 ErrorKind::SomethingSeenWhenSomethingOpen(tag_name.clone()),
3516 ));
3517
3518 let remove = element.clone();
3519
3520 self.run_the_adoption_agency_algorithm(token_and_info, false)?;
3521 self.active_formatting_elements.remove(&remove);
3522 self.open_elements_stack.remove(&remove);
3523 }
3524 }
3525
3526 self.reconstruct_active_formatting_elements()?;
3527
3528 let element = self.insert_html_element(token_and_info)?;
3529
3530 self.active_formatting_elements
3531 .push(ActiveFormattingElement::Element(
3532 element,
3533 token_and_info.clone(),
3534 ));
3535
3536 maybe_allow_self_closing!(is_self_closing, tag_name);
3537 }
3538 // A start tag whose tag name is one of: "b", "big", "code", "em", "font", "i",
3539 // "s", "small", "strike", "strong", "tt", "u"
3540 //
3541 // Reconstruct the active formatting elements, if any.
3542 //
3543 // Insert an HTML element for the token. Push onto the list of active formatting
3544 // elements that element.
3545 Token::StartTag {
3546 tag_name,
3547 is_self_closing,
3548 ..
3549 } if matches!(
3550 &**tag_name,
3551 "b" | "big"
3552 | "code"
3553 | "em"
3554 | "font"
3555 | "i"
3556 | "s"
3557 | "small"
3558 | "strike"
3559 | "strong"
3560 | "tt"
3561 | "u"
3562 ) =>
3563 {
3564 self.reconstruct_active_formatting_elements()?;
3565
3566 let element = self.insert_html_element(token_and_info)?;
3567
3568 self.active_formatting_elements
3569 .push(ActiveFormattingElement::Element(
3570 element,
3571 token_and_info.clone(),
3572 ));
3573
3574 maybe_allow_self_closing!(is_self_closing, tag_name);
3575 }
3576 // A start tag whose tag name is "nobr"
3577 //
3578 // Reconstruct the active formatting elements, if any.
3579 //
3580 // If the stack of open elements has a nobr element in scope, then this is a
3581 // parse error; run the adoption agency algorithm for the token, then once again
3582 // reconstruct the active formatting elements, if any.
3583 //
3584 // Insert an HTML element for the token. Push onto the list of active formatting
3585 // elements that element.
3586 Token::StartTag {
3587 tag_name,
3588 is_self_closing,
3589 ..
3590 } if tag_name == "nobr" => {
3591 self.reconstruct_active_formatting_elements()?;
3592
3593 if self.open_elements_stack.has_in_scope("nobr") {
3594 self.errors.push(Error::new(
3595 token_and_info.span,
3596 ErrorKind::SomethingSeenWhenSomethingOpen(tag_name.clone()),
3597 ));
3598
3599 self.run_the_adoption_agency_algorithm(token_and_info, false)?;
3600 self.reconstruct_active_formatting_elements()?;
3601 }
3602
3603 let element = self.insert_html_element(token_and_info)?;
3604
3605 self.active_formatting_elements
3606 .push(ActiveFormattingElement::Element(
3607 element,
3608 token_and_info.clone(),
3609 ));
3610 maybe_allow_self_closing!(is_self_closing, tag_name);
3611 }
3612 // An end tag whose tag name is one of: "a", "b", "big", "code", "em", "font",
3613 // "i", "nobr", "s", "small", "strike", "strong", "tt", "u"
3614 //
3615 // Run the adoption agency algorithm for the token.
3616 Token::EndTag { tag_name, .. }
3617 if matches!(
3618 &**tag_name,
3619 "a" | "b"
3620 | "big"
3621 | "code"
3622 | "em"
3623 | "font"
3624 | "i"
3625 | "nobr"
3626 | "s"
3627 | "small"
3628 | "strike"
3629 | "strong"
3630 | "tt"
3631 | "u"
3632 ) =>
3633 {
3634 self.run_the_adoption_agency_algorithm(token_and_info, true)?;
3635 }
3636 // A start tag whose tag name is one of: "applet", "marquee", "object"
3637 //
3638 // Reconstruct the active formatting elements, if any.
3639 //
3640 // Insert an HTML element for the token.
3641 //
3642 // Insert a marker at the end of the list of active formatting elements.
3643 //
3644 // Set the frameset-ok flag to "not ok".
3645 Token::StartTag { tag_name, .. }
3646 if matches!(&**tag_name, "applet" | "marquee" | "object") =>
3647 {
3648 self.reconstruct_active_formatting_elements()?;
3649 self.insert_html_element(token_and_info)?;
3650 self.active_formatting_elements.insert_marker();
3651 self.frameset_ok = false;
3652 }
3653 // An end tag token whose tag name is one of: "applet", "marquee", "object"
3654 //
3655 // If the stack of open elements does not have an element in scope that is an
3656 // HTML element with the same tag name as that of the token, then this is a
3657 // parse error; ignore the token.
3658 //
3659 // Otherwise, run these steps:
3660 //
3661 // Generate implied end tags.
3662 //
3663 // If the current node is not an HTML element with the same tag name as that of
3664 // the token, then this is a parse error.
3665 //
3666 // Pop elements from the stack of open elements until an HTML element with the
3667 // same tag name as the token has been popped from the stack.
3668 //
3669 // Clear the list of active formatting elements up to the last marker.
3670 Token::EndTag { tag_name, .. }
3671 if matches!(&**tag_name, "applet" | "marquee" | "object") =>
3672 {
3673 if !self.open_elements_stack.has_in_scope(tag_name) {
3674 self.errors.push(Error::new(
3675 token_and_info.span,
3676 ErrorKind::StrayEndTag(tag_name.clone()),
3677 ));
3678 } else {
3679 self.open_elements_stack.generate_implied_end_tags();
3680
3681 match self.open_elements_stack.items.last() {
3682 Some(node) if !is_html_element_with_tag_name!(node, tag_name) => {
3683 self.errors.push(Error::new(
3684 token_and_info.span,
3685 ErrorKind::UnclosedElements(tag_name.clone()),
3686 ));
3687 }
3688 _ => {}
3689 }
3690
3691 let popped = self
3692 .open_elements_stack
3693 .pop_until_tag_name_popped(&[tag_name]);
3694
3695 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
3696 self.active_formatting_elements.clear_to_last_marker();
3697 }
3698 }
3699 // A start tag whose tag name is "table"
3700 //
3701 // If the Document is not set to quirks mode, and the stack of open elements has
3702 // a p element in button scope, then close a p element.
3703 //
3704 // Insert an HTML element for the token.
3705 //
3706 // Set the frameset-ok flag to "not ok".
3707 //
3708 // Switch the insertion mode to "in table".
3709 Token::StartTag {
3710 tag_name,
3711 is_self_closing,
3712 ..
3713 } if tag_name == "table" => {
3714 if get_document_mode!(self.document.as_ref().unwrap())
3715 != DocumentMode::Quirks
3716 && self.open_elements_stack.has_in_button_scope("p")
3717 {
3718 self.close_p_element(token_and_info, false);
3719 }
3720
3721 self.insert_html_element(token_and_info)?;
3722 self.frameset_ok = false;
3723 self.insertion_mode = InsertionMode::InTable;
3724 maybe_allow_self_closing!(is_self_closing, tag_name);
3725 }
3726 // An end tag whose tag name is "br"
3727 //
3728 // Parse error. Drop the attributes from the token, and act as described in the
3729 // next entry; i.e. act as if this was a "br" start tag token with no
3730 // attributes, rather than the end tag token that it actually is.
3731 Token::EndTag {
3732 tag_name,
3733 is_self_closing,
3734 ..
3735 } if tag_name == "br" => {
3736 let is_self_closing = *is_self_closing;
3737
3738 self.errors
3739 .push(Error::new(token_and_info.span, ErrorKind::EndTagBr));
3740
3741 self.reconstruct_active_formatting_elements()?;
3742 self.insert_html_element(
3743 &self.create_fake_token_and_info("br", Some(token_and_info.span)),
3744 )?;
3745 self.open_elements_stack.pop();
3746
3747 if is_self_closing {
3748 token_and_info.acknowledged = true;
3749 }
3750
3751 self.frameset_ok = false;
3752 }
3753 // A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen",
3754 // "wbr"
3755 //
3756 // Reconstruct the active formatting elements, if any.
3757 //
3758 // Insert an HTML element for the token. Immediately pop the current node off
3759 // the stack of open elements.
3760 //
3761 // Acknowledge the token's self-closing flag, if it is set.
3762 //
3763 // Set the frameset-ok flag to "not ok".
3764 Token::StartTag {
3765 tag_name,
3766 is_self_closing,
3767 ..
3768 } if matches!(
3769 &**tag_name,
3770 "area" | "br" | "embed" | "img" | "keygen" | "wbr"
3771 ) =>
3772 {
3773 let is_self_closing = *is_self_closing;
3774
3775 self.reconstruct_active_formatting_elements()?;
3776 self.insert_html_element(token_and_info)?;
3777 self.open_elements_stack.pop();
3778
3779 if is_self_closing {
3780 token_and_info.acknowledged = true;
3781 }
3782
3783 self.frameset_ok = false;
3784 }
3785 // A start tag whose tag name is "input"
3786 //
3787 // Reconstruct the active formatting elements, if any.
3788 //
3789 // Insert an HTML element for the token. Immediately pop the current node off
3790 // the stack of open elements.
3791 //
3792 // Acknowledge the token's self-closing flag, if it is set.
3793 //
3794 // If the token does not have an attribute with the name "type", or if it does,
3795 // but that attribute's value is not an ASCII case-insensitive match for the
3796 // string "hidden", then: set the frameset-ok flag to "not ok".
3797 Token::StartTag {
3798 tag_name,
3799 is_self_closing,
3800 attributes,
3801 ..
3802 } if tag_name == "input" => {
3803 let is_self_closing = *is_self_closing;
3804 let input_type =
3805 attributes.iter().find(|attribute| attribute.name == "type");
3806 let is_hidden = match &input_type {
3807 Some(input_type) => match &input_type.value {
3808 Some(value) if value.as_ref().eq_ignore_ascii_case("hidden") => {
3809 true
3810 }
3811 _ => false,
3812 },
3813 _ => false,
3814 };
3815
3816 self.reconstruct_active_formatting_elements()?;
3817
3818 // To avoid extra cloning, it doesn't have effect on logic
3819 if input_type.is_none() || !is_hidden {
3820 self.frameset_ok = false;
3821 }
3822
3823 self.insert_html_element(token_and_info)?;
3824 self.open_elements_stack.pop();
3825
3826 if is_self_closing {
3827 token_and_info.acknowledged = true;
3828 }
3829 }
3830 // A start tag whose tag name is one of: "param", "source", "track"
3831 //
3832 // Insert an HTML element for the token. Immediately pop the current node off
3833 // the stack of open elements.
3834 //
3835 // Acknowledge the token's self-closing flag, if it is set.
3836 Token::StartTag {
3837 tag_name,
3838 is_self_closing,
3839 ..
3840 } if matches!(&**tag_name, "param" | "source" | "track") => {
3841 let is_self_closing = *is_self_closing;
3842
3843 self.insert_html_element(token_and_info)?;
3844 self.open_elements_stack.pop();
3845
3846 if is_self_closing {
3847 token_and_info.acknowledged = true;
3848 }
3849 }
3850 // A start tag whose tag name is "hr"
3851 //
3852 // If the stack of open elements has a p element in button scope, then close a p
3853 // element.
3854 //
3855 // Insert an HTML element for the token. Immediately pop the current node off
3856 // the stack of open elements.
3857 //
3858 // Acknowledge the token's self-closing flag, if it is set.
3859 //
3860 // Set the frameset-ok flag to "not ok".
3861 Token::StartTag {
3862 tag_name,
3863 is_self_closing,
3864 ..
3865 } if tag_name == "hr" => {
3866 let is_self_closing = *is_self_closing;
3867
3868 if self.open_elements_stack.has_in_button_scope("p") {
3869 self.close_p_element(token_and_info, false);
3870 }
3871
3872 self.insert_html_element(token_and_info)?;
3873 self.open_elements_stack.pop();
3874
3875 if is_self_closing {
3876 token_and_info.acknowledged = true;
3877 }
3878
3879 self.frameset_ok = false;
3880 }
3881 // A start tag whose tag name is "image"
3882 //
3883 // Parse error. Change the token's tag name to "img" and reprocess it. (Don't
3884 // ask.)
3885 Token::StartTag { tag_name, .. } if tag_name == "image" => {
3886 self.errors.push(Error::new(
3887 token_and_info.span,
3888 ErrorKind::UnexpectedImageStartTag,
3889 ));
3890
3891 match token_and_info {
3892 TokenAndInfo {
3893 token: Token::StartTag { tag_name, .. },
3894 ..
3895 } => {
3896 *tag_name = atom!("img");
3897 }
3898 _ => {
3899 unreachable!();
3900 }
3901 }
3902
3903 self.process_token(token_and_info, None)?;
3904 }
3905 // A start tag whose tag name is "textarea"
3906 //
3907 // Run these steps:
3908 //
3909 // Insert an HTML element for the token.
3910 //
3911 // If the next token is a U+000A LINE FEED (LF) character token, then ignore
3912 // that token and move on to the next one. (Newlines at the start of textarea
3913 // elements are ignored as an authoring convenience.)
3914 //
3915 // Switch the tokenizer to the RCDATA state.
3916 //
3917 // Let the original insertion mode be the current insertion mode.
3918 //
3919 // Set the frameset-ok flag to "not ok".
3920 //
3921 // Switch the insertion mode to "text".
3922 Token::StartTag { tag_name, .. } if tag_name == "textarea" => {
3923 self.insert_html_element(token_and_info)?;
3924
3925 // To prevent parsing more tokens in lexer we set state before taking
3926 self.input.set_input_state(State::Rcdata);
3927
3928 match self.input.cur()? {
3929 Some(Token::Character { value, .. }) if *value == '\x0A' => {
3930 bump!(self);
3931 }
3932 _ => {}
3933 };
3934
3935 self.original_insertion_mode = self.insertion_mode.clone();
3936 self.frameset_ok = false;
3937 self.insertion_mode = InsertionMode::Text;
3938 }
3939 // A start tag whose tag name is "xmp"
3940 //
3941 // If the stack of open elements has a p element in button scope, then close a p
3942 // element.
3943 //
3944 // Reconstruct the active formatting elements, if any.
3945 //
3946 // Set the frameset-ok flag to "not ok".
3947 //
3948 // Follow the generic raw text element parsing algorithm.
3949 Token::StartTag {
3950 tag_name,
3951 is_self_closing,
3952 ..
3953 } if tag_name == "xmp" => {
3954 if self.open_elements_stack.has_in_button_scope("p") {
3955 self.close_p_element(token_and_info, false);
3956 }
3957
3958 self.reconstruct_active_formatting_elements()?;
3959 self.frameset_ok = false;
3960 self.parse_generic_text_element(token_and_info, true)?;
3961 maybe_allow_self_closing!(is_self_closing, tag_name);
3962 }
3963 // A start tag whose tag name is "iframe"
3964 //
3965 // Set the frameset-ok flag to "not ok".
3966 //
3967 // Follow the generic raw text element parsing algorithm.
3968 Token::StartTag {
3969 tag_name,
3970 is_self_closing,
3971 ..
3972 } if tag_name == "iframe" => {
3973 self.frameset_ok = false;
3974 self.parse_generic_text_element(token_and_info, true)?;
3975 maybe_allow_self_closing!(is_self_closing, tag_name);
3976 }
3977 // A start tag whose tag name is "noembed"
3978 //
3979 // A start tag whose tag name is "noscript", if the scripting flag is enabled
3980 //
3981 // Follow the generic raw text element parsing algorithm.
3982 Token::StartTag {
3983 tag_name,
3984 is_self_closing,
3985 ..
3986 } if tag_name == "noembed"
3987 || (tag_name == "noscript" && self.config.scripting_enabled) =>
3988 {
3989 self.parse_generic_text_element(token_and_info, true)?;
3990 maybe_allow_self_closing!(is_self_closing, tag_name);
3991 }
3992 // A start tag whose tag name is "select"
3993 //
3994 // Reconstruct the active formatting elements, if any.
3995 //
3996 // Insert an HTML element for the token.
3997 //
3998 // Set the frameset-ok flag to "not ok".
3999 //
4000 // If the insertion mode is one of "in table", "in caption", "in table body",
4001 // "in row", or "in cell", then switch the insertion mode to "in select in
4002 // table". Otherwise, switch the insertion mode to "in select".
4003 Token::StartTag {
4004 tag_name,
4005 is_self_closing,
4006 ..
4007 } if tag_name == "select" => {
4008 self.reconstruct_active_formatting_elements()?;
4009 self.insert_html_element(token_and_info)?;
4010 self.frameset_ok = false;
4011
4012 match self.insertion_mode {
4013 InsertionMode::InTable
4014 | InsertionMode::InCaption
4015 | InsertionMode::InTableBody
4016 | InsertionMode::InRow
4017 | InsertionMode::InCell => {
4018 self.insertion_mode = InsertionMode::InSelectInTable;
4019 }
4020 _ => {
4021 self.insertion_mode = InsertionMode::InSelect;
4022 }
4023 }
4024
4025 maybe_allow_self_closing!(is_self_closing, tag_name);
4026 }
4027 // A start tag whose tag name is one of: "optgroup", "option"
4028 //
4029 // If the current node is an option element, then pop the current node off the
4030 // stack of open elements.
4031 //
4032 // Reconstruct the active formatting elements, if any.
4033 //
4034 // Insert an HTML element for the token.
4035 Token::StartTag {
4036 tag_name,
4037 is_self_closing,
4038 ..
4039 } if matches!(&**tag_name, "optgroup" | "option") => {
4040 match self.open_elements_stack.items.last() {
4041 Some(node) if is_html_element!(node, "option") => {
4042 self.open_elements_stack.pop();
4043 }
4044 _ => {}
4045 }
4046
4047 self.reconstruct_active_formatting_elements()?;
4048 self.insert_html_element(token_and_info)?;
4049 maybe_allow_self_closing!(is_self_closing, tag_name);
4050 }
4051 // A start tag whose tag name is one of: "rb", "rtc"
4052 //
4053 // If the stack of open elements has a ruby element in scope, then generate
4054 // implied end tags. If the current node is not now a ruby element, this is a
4055 // parse error.
4056 //
4057 // Insert an HTML element for the token.
4058 Token::StartTag {
4059 tag_name,
4060 is_self_closing,
4061 ..
4062 } if matches!(&**tag_name, "rb" | "rtc") => {
4063 let is_scope = self.open_elements_stack.has_in_scope("ruby");
4064
4065 if is_scope {
4066 self.open_elements_stack.generate_implied_end_tags();
4067 }
4068
4069 match self.open_elements_stack.items.last() {
4070 Some(node) if !is_html_element!(node, "ruby") => {
4071 if !is_scope {
4072 self.errors.push(Error::new(
4073 token_and_info.span,
4074 ErrorKind::StartTagSeenWithoutRuby(tag_name.clone()),
4075 ));
4076 } else {
4077 self.errors.push(Error::new(
4078 token_and_info.span,
4079 ErrorKind::UnclosedChildrenInRuby,
4080 ));
4081 }
4082 }
4083 _ => {}
4084 }
4085
4086 self.insert_html_element(token_and_info)?;
4087 maybe_allow_self_closing!(is_self_closing, tag_name);
4088 }
4089 // A start tag whose tag name is one of: "rp", "rt"
4090 //
4091 // If the stack of open elements has a ruby element in scope, then generate
4092 // implied end tags, except for rtc elements. If the current node is not now a
4093 // rtc element or a ruby element, this is a parse error.
4094 //
4095 // Insert an HTML element for the token.
4096 Token::StartTag {
4097 tag_name,
4098 is_self_closing,
4099 ..
4100 } if matches!(&**tag_name, "rp" | "rt") => {
4101 let in_scope = self.open_elements_stack.has_in_scope("ruby");
4102
4103 if in_scope {
4104 self.open_elements_stack
4105 .generate_implied_end_tags_with_exclusion("rtc");
4106 }
4107
4108 match self.open_elements_stack.items.last() {
4109 Some(node) if !is_html_element!(node, "rtc" | "ruby") => {
4110 if !in_scope {
4111 self.errors.push(Error::new(
4112 token_and_info.span,
4113 ErrorKind::StartTagSeenWithoutRuby(tag_name.clone()),
4114 ));
4115 } else {
4116 self.errors.push(Error::new(
4117 token_and_info.span,
4118 ErrorKind::UnclosedChildrenInRuby,
4119 ));
4120 }
4121 }
4122 _ => {}
4123 }
4124
4125 self.insert_html_element(token_and_info)?;
4126 maybe_allow_self_closing!(is_self_closing, tag_name);
4127 }
4128 // A start tag whose tag name is "math"
4129 //
4130 // Reconstruct the active formatting elements, if any.
4131 //
4132 // Adjust MathML attributes for the token. (This fixes the case of MathML
4133 // attributes that are not all lowercase.)
4134 //
4135 // Adjust foreign attributes for the token. (This fixes the use of namespaced
4136 // attributes, in particular XLink.)
4137 //
4138 // Insert a foreign element for the token, in the MathML namespace.
4139 //
4140 // If the token has its self-closing flag set, pop the current node off the
4141 // stack of open elements and acknowledge the token's self-closing flag.
4142 Token::StartTag {
4143 tag_name,
4144 is_self_closing,
4145 ..
4146 } if tag_name == "math" => {
4147 let is_self_closing = *is_self_closing;
4148
4149 self.reconstruct_active_formatting_elements()?;
4150 self.insert_foreign_element(
4151 token_and_info,
4152 Namespace::MATHML,
4153 Some(AdjustAttributes::MathML),
4154 )?;
4155
4156 if is_self_closing {
4157 self.open_elements_stack.pop();
4158
4159 token_and_info.acknowledged = true;
4160 }
4161 }
4162 // A start tag whose tag name is "svg"
4163 //
4164 // Reconstruct the active formatting elements, if any.
4165 //
4166 // Adjust SVG attributes for the token. (This fixes the case of SVG attributes
4167 // that are not all lowercase.)
4168 //
4169 // Adjust foreign attributes for the token. (This fixes the use of namespaced
4170 // attributes, in particular XLink in SVG.)
4171 //
4172 // Insert a foreign element for the token, in the SVG namespace.
4173 //
4174 // If the token has its self-closing flag set, pop the current node off the
4175 // stack of open elements and acknowledge the token's self-closing flag.
4176 Token::StartTag {
4177 tag_name,
4178 is_self_closing,
4179 ..
4180 } if tag_name == "svg" => {
4181 let is_self_closing = *is_self_closing;
4182
4183 self.reconstruct_active_formatting_elements()?;
4184 self.insert_foreign_element(
4185 token_and_info,
4186 Namespace::SVG,
4187 Some(AdjustAttributes::Svg),
4188 )?;
4189
4190 if is_self_closing {
4191 self.open_elements_stack.pop();
4192
4193 token_and_info.acknowledged = true;
4194 }
4195 }
4196
4197 // A start tag whose tag name is one of: "caption", "col", "colgroup", "frame",
4198 // "head", "tbody", "td", "tfoot", "th", "thead", "tr"
4199 //
4200 // Parse error. Ignore the token.
4201 Token::StartTag { tag_name, .. }
4202 if matches!(
4203 &**tag_name,
4204 "caption"
4205 | "col"
4206 | "colgroup"
4207 | "frame"
4208 | "head"
4209 | "tbody"
4210 | "td"
4211 | "tfoot"
4212 | "th"
4213 | "thead"
4214 | "tr"
4215 ) =>
4216 {
4217 self.errors.push(Error::new(
4218 token_and_info.span,
4219 ErrorKind::StrayStartTag(tag_name.clone()),
4220 ));
4221 }
4222 // Any other start tag
4223 //
4224 // Reconstruct the active formatting elements, if any.
4225 //
4226 // Insert an HTML element for the token.
4227 Token::StartTag {
4228 is_self_closing,
4229 tag_name,
4230 ..
4231 } => {
4232 self.reconstruct_active_formatting_elements()?;
4233 self.insert_html_element(token_and_info)?;
4234 maybe_allow_self_closing!(is_self_closing, tag_name);
4235 }
4236 // Any other end tag
4237 Token::EndTag { .. } => {
4238 self.any_other_end_tag_for_in_body_insertion_mode(token_and_info);
4239 }
4240 }
4241
4242 // When the steps above say the user agent is to close a p
4243 // element, it means that the user agent must run the following
4244 // steps:
4245 //
4246 // Generate implied end tags, except for p elements.
4247 //
4248 // If the current node is not a p element, then this is a parse
4249 // error.
4250 //
4251 // Pop elements from the stack of open elements until a p
4252 // element has been popped from the stack.
4253 //
4254 // The adoption agency algorithm, which takes as its only
4255 // argument a token token for which the algorithm is being run,
4256 // consists of the following steps:
4257 //
4258 // Let subject be token's tag name.
4259 //
4260 // If the current node is an HTML element whose tag name is
4261 // subject, and the current node is not in the list of active
4262 // formatting elements, then pop the current node off the stack
4263 // of open elements and return.
4264 //
4265 // Let outer loop counter be 0.
4266 //
4267 // While true:
4268 //
4269 // If outer loop counter is greater than or equal to 8, then
4270 // return.
4271 //
4272 // Increment outer loop counter by 1.
4273 //
4274 // Let formatting element be the last element in the list of
4275 // active formatting elements that:
4276 //
4277 // is between the end of the list and the last marker in the
4278 // list, if any, or the start of the list otherwise, and
4279 // has the tag name subject.
4280 // If there is no such element, then return and instead act as
4281 // described in the "any other end tag" entry above.
4282 //
4283 // If formatting element is not in the stack of open elements,
4284 // then this is a parse error; remove the element from the list,
4285 // and return.
4286 //
4287 // If formatting element is in the stack of open elements, but
4288 // the element is not in scope, then this is a parse error;
4289 // return.
4290 //
4291 // If formatting element is not the current node, this is a
4292 // parse error. (But do not return.)
4293 //
4294 // Let furthest block be the topmost node in the stack of open
4295 // elements that is lower in the stack than formatting element,
4296 // and is an element in the special category. There might not be
4297 // one.
4298 //
4299 // If there is no furthest block, then the UA must first pop all
4300 // the nodes from the bottom of the stack of open elements, from
4301 // the current node up to and including formatting element, then
4302 // remove formatting element from the list of active formatting
4303 // elements, and finally return.
4304 //
4305 // Let common ancestor be the element immediately above
4306 // formatting element in the stack of open elements.
4307 //
4308 // Let a bookmark note the position of formatting element in the
4309 // list of active formatting elements relative to the elements
4310 // on either side of it in the list.
4311 //
4312 // Let node and last node be furthest block.
4313 //
4314 // Let inner loop counter be 0.
4315 //
4316 // While true:
4317 //
4318 // Increment inner loop counter by 1.
4319 //
4320 // Let node be the element immediately above node in the stack
4321 // of open elements, or if node is no longer in the stack of
4322 // open elements (e.g. because it got removed by this
4323 // algorithm), the element that was immediately above node in
4324 // the stack of open elements before node was removed.
4325 //
4326 // If node is formatting element, then break.
4327 //
4328 // If inner loop counter is greater than 3 and node is in the
4329 // list of active formatting elements, then remove node from the
4330 // list of active formatting elements.
4331 //
4332 // If node is not in the list of active formatting elements,
4333 // then remove node from the stack of open elements and
4334 // continue.
4335 //
4336 // Create an element for the token for which the element node
4337 // was created, in the HTML namespace, with common ancestor as
4338 // the intended parent; replace the entry for node in the list
4339 // of active formatting elements with an entry for the new
4340 // element, replace the entry for node in the stack of open
4341 // elements with an entry for the new element, and let node be
4342 // the new element.
4343 //
4344 // If last node is furthest block, then move the aforementioned
4345 // bookmark to be immediately after the new node in the list of
4346 // active formatting elements.
4347 //
4348 // Append last node to node.
4349 //
4350 // Set last node to node.
4351 //
4352 // Insert whatever last node ended up being in the previous step
4353 // at the appropriate place for inserting a node, but using
4354 // common ancestor as the override target.
4355 //
4356 // Create an element for the token for which formatting element
4357 // was created, in the HTML namespace, with furthest block as
4358 // the intended parent.
4359 //
4360 // Take all of the child nodes of furthest block and append them
4361 // to the element created in the last step.
4362 //
4363 // Append that new element to furthest block.
4364 //
4365 // Remove formatting element from the list of active formatting
4366 // elements, and insert the new element into the list of active
4367 // formatting elements at the position of the aforementioned
4368 // bookmark.
4369 //
4370 // Remove formatting element from the stack of open elements,
4371 // and insert the new element into the stack of open elements
4372 // immediately below the position of furthest block in that
4373 // stack.
4374 }
4375 // The "text" insertion mode
4376 InsertionMode::Text => {
4377 // When the user agent is to apply the rules for the "text" insertion mode, the
4378 // user agent must handle the token as follows:
4379 match token {
4380 // A character token
4381 //
4382 // Insert the token's character.
4383 Token::Character { .. } => {
4384 self.insert_character(token_and_info)?;
4385 }
4386 // An end-of-file token
4387 //
4388 // Parse error.
4389 //
4390 // If the current node is a script element, mark the script element as "already
4391 // started".
4392 //
4393 // Pop the current node off the stack of open elements.
4394 //
4395 // Switch the insertion mode to the original insertion mode and reprocess the
4396 // token.
4397 Token::Eof => {
4398 self.errors
4399 .push(Error::new(token_and_info.span, ErrorKind::EofInText));
4400
4401 let popped = self.open_elements_stack.pop();
4402
4403 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
4404 self.insertion_mode = self.original_insertion_mode.clone();
4405 self.process_token(token_and_info, None)?;
4406 }
4407 // An end tag whose tag name is "script"
4408 //
4409 // If the active speculative HTML parser is null and the JavaScript execution
4410 // context stack is empty, then perform a microtask checkpoint.
4411 //
4412 // Let script be the current node (which will be a script element).
4413 //
4414 // Pop the current node off the stack of open elements.
4415 //
4416 // Switch the insertion mode to the original insertion mode.
4417 //
4418 // Let the old insertion point have the same value as the current insertion
4419 // point. Let the insertion point be just before the next input character.
4420 //
4421 // Increment the parser's script nesting level by one.
4422 //
4423 // If the active speculative HTML parser is null, then prepare the script. This
4424 // might cause some script to execute, which might cause new characters to be
4425 // inserted into the tokenizer, and might cause the tokenizer to output more
4426 // tokens, resulting in a reentrant invocation of the parser.
4427 //
4428 // Decrement the parser's script nesting level by one. If the parser's script
4429 // nesting level is zero, then set the parser pause flag to false.
4430 //
4431 // Let the insertion point have the value of the old insertion point. (In other
4432 // words, restore the insertion point to its previous value. This value might be
4433 // the "undefined" value.)
4434 //
4435 // At this stage, if there is a pending parsing-blocking script, then:
4436 //
4437 // If the script nesting level is not zero:
4438 // Set the parser pause flag to true, and abort the processing of any nested
4439 // invocations of the tokenizer, yielding control back to the caller.
4440 // (Tokenization will resume when the caller returns to the "outer" tree
4441 // construction stage.)
4442 //
4443 // The tree construction stage of this particular parser is being called
4444 // reentrantly, say from a call to document.write().
4445 //
4446 // Otherwise:
4447 // Run these steps:
4448 //
4449 // Let the script be the pending parsing-blocking script. There is no longer a
4450 // pending parsing-blocking script.
4451 //
4452 // Start the speculative HTML parser for this instance of the HTML parser.
4453 //
4454 // Block the tokenizer for this instance of the HTML parser, such that the event
4455 // loop will not run tasks that invoke the tokenizer.
4456 //
4457 // If the parser's Document has a style sheet that is blocking scripts or the
4458 // script's "ready to be parser-executed" flag is not set: spin the event loop
4459 // until the parser's Document has no style sheet that is blocking scripts and
4460 // the script's "ready to be parser-executed" flag is set.
4461 //
4462 // If this parser has been aborted in the meantime, return.
4463 //
4464 // This could happen if, e.g., while the spin the event loop algorithm is
4465 // running, the browsing context gets closed, or the document.open() method gets
4466 // invoked on the Document.
4467 //
4468 // Stop the speculative HTML parser for this instance of the HTML parser.
4469 //
4470 // Unblock the tokenizer for this instance of the HTML parser, such that tasks
4471 // that invoke the tokenizer can again be run.
4472 //
4473 // Let the insertion point be just before the next input character.
4474 //
4475 // Increment the parser's script nesting level by one (it should be zero before
4476 // this step, so this sets it to one).
4477 //
4478 // Execute the script.
4479 //
4480 // Decrement the parser's script nesting level by one. If the parser's script
4481 // nesting level is zero (which it always should be at this point), then set the
4482 // parser pause flag to false.
4483 //
4484 // Let the insertion point be undefined again.
4485 //
4486 // If there is once again a pending parsing-blocking script, then repeat these
4487 // steps from step 1.
4488 Token::EndTag { tag_name, .. } if tag_name == "script" => {
4489 // More things can be implemented to intercept script execution
4490 let popped = self.open_elements_stack.pop();
4491
4492 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
4493 self.insertion_mode = self.original_insertion_mode.clone();
4494 }
4495 // Any other end tag
4496 //
4497 // Pop the current node off the stack of open elements.
4498 //
4499 // Switch the insertion mode to the original insertion mode.
4500 _ => {
4501 if let Token::EndTag { .. } = token {
4502 self.update_end_tag_span(
4503 self.open_elements_stack.items.last(),
4504 token_and_info.span,
4505 );
4506 }
4507
4508 self.open_elements_stack.pop();
4509 self.insertion_mode = self.original_insertion_mode.clone();
4510 }
4511 }
4512 }
4513 // The "in table" insertion mode
4514 InsertionMode::InTable => {
4515 // When the user agent is to apply the rules for the "in table" insertion mode,
4516 // the user agent must handle the token as follows:
4517 match token {
4518 // A character token, if the current node is table, tbody, tfoot, thead, or tr
4519 // element
4520 //
4521 // Let the pending table character tokens be an empty list of tokens.
4522 //
4523 // Let the original insertion mode be the current insertion mode.
4524 //
4525 // Switch the insertion mode to "in table text" and reprocess the token.
4526 Token::Character { .. }
4527 if match self.open_elements_stack.items.last() {
4528 Some(node)
4529 if is_html_element!(
4530 node,
4531 "table" | "tbody" | "tfoot" | "thead" | "tr" | "template"
4532 ) =>
4533 {
4534 true
4535 }
4536 _ => false,
4537 } =>
4538 {
4539 self.pending_character_tokens.clear();
4540 self.original_insertion_mode = self.insertion_mode.clone();
4541 self.insertion_mode = InsertionMode::InTableText;
4542 self.process_token(token_and_info, None)?;
4543 }
4544 // A comment token
4545 //
4546 // Insert a comment.
4547 Token::Comment { .. } => {
4548 self.insert_comment(token_and_info)?;
4549 }
4550 // A DOCTYPE token
4551 //
4552 // Parse error. Ignore the token.
4553 Token::Doctype { .. } => {
4554 self.errors
4555 .push(Error::new(token_and_info.span, ErrorKind::StrayDoctype));
4556 }
4557 // A start tag whose tag name is "caption"
4558 //
4559 // Clear the stack back to a table context. (See below.)
4560 //
4561 // Insert a marker at the end of the list of active formatting elements.
4562 //
4563 // Insert an HTML element for the token, then switch the insertion mode to "in
4564 // caption".
4565 Token::StartTag {
4566 tag_name,
4567 is_self_closing,
4568 ..
4569 } if tag_name == "caption" => {
4570 self.open_elements_stack.clear_back_to_table_context();
4571 self.active_formatting_elements.insert_marker();
4572 self.insert_html_element(token_and_info)?;
4573 self.insertion_mode = InsertionMode::InCaption;
4574 maybe_allow_self_closing!(is_self_closing, tag_name);
4575 }
4576 // A start tag whose tag name is "colgroup"
4577 //
4578 // Clear the stack back to a table context. (See below.)
4579 //
4580 // Insert an HTML element for the token, then switch the insertion mode to "in
4581 // column group".
4582 Token::StartTag {
4583 tag_name,
4584 is_self_closing,
4585 ..
4586 } if tag_name == "colgroup" => {
4587 self.open_elements_stack.clear_back_to_table_context();
4588 self.insert_html_element(token_and_info)?;
4589 self.insertion_mode = InsertionMode::InColumnGroup;
4590 maybe_allow_self_closing!(is_self_closing, tag_name);
4591 }
4592 // A start tag whose tag name is "col"
4593 //
4594 // Clear the stack back to a table context. (See below.)
4595 //
4596 // Insert an HTML element for a "colgroup" start tag token with no attributes,
4597 // then switch the insertion mode to "in column group".
4598 //
4599 // Reprocess the current token.
4600 Token::StartTag { tag_name, .. } if tag_name == "col" => {
4601 self.open_elements_stack.clear_back_to_table_context();
4602 self.insert_html_element(
4603 &self.create_fake_token_and_info("colgroup", None),
4604 )?;
4605 self.insertion_mode = InsertionMode::InColumnGroup;
4606 self.process_token(token_and_info, None)?;
4607 }
4608 // A start tag whose tag name is one of: "tbody", "tfoot", "thead"
4609 //
4610 // Clear the stack back to a table context. (See below.)
4611 //
4612 // Insert an HTML element for the token, then switch the insertion mode to "in
4613 // table body".
4614 Token::StartTag {
4615 tag_name,
4616 is_self_closing,
4617 ..
4618 } if matches!(&**tag_name, "tbody" | "tfoot" | "thead") => {
4619 self.open_elements_stack.clear_back_to_table_context();
4620 self.insert_html_element(token_and_info)?;
4621 self.insertion_mode = InsertionMode::InTableBody;
4622 maybe_allow_self_closing!(is_self_closing, tag_name);
4623 }
4624 // A start tag whose tag name is one of: "td", "th", "tr"
4625 //
4626 // Clear the stack back to a table context. (See below.)
4627 //
4628 // Insert an HTML element for a "tbody" start tag token with no attributes, then
4629 // switch the insertion mode to "in table body".
4630 //
4631 // Reprocess the current token.
4632 Token::StartTag { tag_name, .. }
4633 if matches!(&**tag_name, "td" | "th" | "tr") =>
4634 {
4635 self.open_elements_stack.clear_back_to_table_context();
4636 self.insert_html_element(&self.create_fake_token_and_info("tbody", None))?;
4637 self.insertion_mode = InsertionMode::InTableBody;
4638 self.process_token(token_and_info, None)?;
4639 }
4640 // A start tag whose tag name is "table"
4641 //
4642 // Parse error.
4643 //
4644 // If the stack of open elements does not have a table element in table scope,
4645 // ignore the token.
4646 //
4647 // Otherwise:
4648 //
4649 // Pop elements from this stack until a table element has been popped from the
4650 // stack.
4651 //
4652 // Reset the insertion mode appropriately.
4653 //
4654 // Reprocess the token.
4655 Token::StartTag { tag_name, .. } if tag_name == "table" => {
4656 self.errors.push(Error::new(
4657 token_and_info.span,
4658 ErrorKind::TableSeenWhileTableOpen,
4659 ));
4660
4661 if !self.open_elements_stack.has_in_table_scope("table") {
4662 // Ignore
4663
4664 return Ok(());
4665 }
4666
4667 self.open_elements_stack
4668 .pop_until_tag_name_popped(&["table"]);
4669 self.reset_insertion_mode();
4670 self.process_token(token_and_info, None)?;
4671 }
4672 // An end tag whose tag name is "table"
4673 //
4674 // If the stack of open elements does not have a table element in table scope,
4675 // this is a parse error; ignore the token.
4676 //
4677 // Otherwise:
4678 //
4679 // Pop elements from this stack until a table element has been popped from the
4680 // stack.
4681 //
4682 // Reset the insertion mode appropriately.
4683 Token::EndTag { tag_name, .. } if tag_name == "table" => {
4684 if !self.open_elements_stack.has_in_table_scope("table") {
4685 self.errors.push(Error::new(
4686 token_and_info.span,
4687 ErrorKind::StrayEndTag(tag_name.clone()),
4688 ));
4689 } else {
4690 let popped = self
4691 .open_elements_stack
4692 .pop_until_tag_name_popped(&["table"]);
4693
4694 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
4695 self.reset_insertion_mode();
4696 }
4697 }
4698 // An end tag whose tag name is one of: "body", "caption", "col", "colgroup",
4699 // "html", "tbody", "td", "tfoot", "th", "thead", "tr"
4700 //
4701 // Parse error. Ignore the token.
4702 Token::EndTag { tag_name, .. }
4703 if matches!(
4704 &**tag_name,
4705 "body"
4706 | "caption"
4707 | "col"
4708 | "colgroup"
4709 | "html"
4710 | "tbody"
4711 | "td"
4712 | "tfoot"
4713 | "th"
4714 | "thead"
4715 | "tr"
4716 ) =>
4717 {
4718 self.errors.push(Error::new(
4719 token_and_info.span,
4720 ErrorKind::StrayEndTag(tag_name.clone()),
4721 ));
4722 }
4723 // A start tag whose tag name is one of: "style", "script", "template"
4724 //
4725 // An end tag whose tag name is "template"
4726 //
4727 // Process the token using the rules for the "in head" insertion mode.
4728 Token::StartTag { tag_name, .. }
4729 if matches!(&**tag_name, "style" | "script" | "template") =>
4730 {
4731 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
4732 }
4733 Token::EndTag { tag_name, .. } if tag_name == "template" => {
4734 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
4735 }
4736 // A start tag whose tag name is "input"
4737 //
4738 // If the token does not have an attribute with the name "type", or if it does,
4739 // but that attribute's value is not an ASCII case-insensitive match for the
4740 // string "hidden", then: act as described in the "anything else" entry below.
4741 //
4742 // Otherwise:
4743 //
4744 // Parse error.
4745 //
4746 // Insert an HTML element for the token.
4747 //
4748 // Pop that input element off the stack of open elements.
4749 //
4750 // Acknowledge the token's self-closing flag, if it is set.
4751 Token::StartTag {
4752 tag_name,
4753 attributes,
4754 is_self_closing,
4755 ..
4756 } if tag_name == "input" => {
4757 let is_self_closing = *is_self_closing;
4758 let input_type =
4759 attributes.iter().find(|attribute| attribute.name == "type");
4760 let is_hidden = match &input_type {
4761 Some(input_type) => match &input_type.value {
4762 Some(value) if value.as_ref().eq_ignore_ascii_case("hidden") => {
4763 true
4764 }
4765 _ => false,
4766 },
4767 _ => false,
4768 };
4769
4770 if input_type.is_none() || !is_hidden {
4771 self.process_token_in_table_insertion_mode_anything_else(
4772 token_and_info,
4773 )?;
4774 } else {
4775 self.errors.push(Error::new(
4776 token_and_info.span,
4777 ErrorKind::StartTagInTable(tag_name.clone()),
4778 ));
4779
4780 self.insert_html_element(token_and_info)?;
4781 self.open_elements_stack.pop();
4782
4783 if is_self_closing {
4784 token_and_info.acknowledged = true;
4785 }
4786 }
4787 }
4788 // A start tag whose tag name is "form"
4789 //
4790 // Parse error.
4791 //
4792 // If there is a template element on the stack of open elements, or if the form
4793 // element pointer is not null, ignore the token.
4794 //
4795 // Otherwise:
4796 //
4797 // Insert an HTML element for the token, and set the form element pointer to
4798 // point to the element created.
4799 //
4800 // Pop that form element off the stack of open elements.
4801 Token::StartTag { tag_name, .. } if tag_name == "form" => {
4802 self.errors.push(Error::new(
4803 token_and_info.span,
4804 ErrorKind::StartTagInTable(tag_name.clone()),
4805 ));
4806
4807 if self.open_elements_stack.contains_template_element()
4808 || self.form_element_pointer.is_some()
4809 {
4810 // Ignore
4811 return Ok(());
4812 }
4813
4814 let element = self.insert_html_element(token_and_info)?;
4815
4816 self.form_element_pointer = Some(element);
4817 self.open_elements_stack.pop();
4818 }
4819 // An end-of-file token
4820 //
4821 // Process the token using the rules for the "in body" insertion mode.
4822 Token::Eof => {
4823 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
4824 }
4825 // Anything else
4826 //
4827 // Parse error. Enable foster parenting, process the token using the rules for
4828 // the "in body" insertion mode, and then disable foster parenting.
4829 _ => {
4830 self.process_token_in_table_insertion_mode_anything_else(token_and_info)?;
4831 }
4832 }
4833 }
4834 // The "in table text" insertion mode
4835 InsertionMode::InTableText => {
4836 // When the user agent is to apply the rules for the "in table text" insertion
4837 // mode, the user agent must handle the token as follows:
4838 match token {
4839 // A character token that is U+0000 NULL
4840 //
4841 // Parse error. Ignore the token.
4842 Token::Character { value, .. } if *value == '\x00' => {
4843 self.errors.push(Error::new(
4844 token_and_info.span,
4845 ErrorKind::UnexpectedNullCharacter,
4846 ));
4847 }
4848 // Any other character token
4849 //
4850 // Append the character token to the pending table character tokens list.
4851 Token::Character { .. } => {
4852 self.pending_character_tokens.push(token_and_info.clone());
4853 }
4854 // Anything else
4855 //
4856 // If any of the tokens in the pending table character tokens list are character
4857 // tokens that are not ASCII whitespace, then this is a parse error: reprocess
4858 // the character tokens in the pending table character tokens list using the
4859 // rules given in the "anything else" entry in the "in table" insertion mode.
4860 //
4861 // Otherwise, insert the characters given by the pending table character tokens
4862 // list.
4863 //
4864 // Switch the insertion mode to the original insertion mode and reprocess the
4865 // token.
4866 _ => {
4867 let mut has_non_ascii_whitespace = false;
4868
4869 for character_token in &self.pending_character_tokens {
4870 match character_token.token {
4871 Token::Character { value, .. }
4872 if !matches!(
4873 value,
4874 '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20'
4875 ) =>
4876 {
4877 has_non_ascii_whitespace = true;
4878
4879 break;
4880 }
4881 _ => {}
4882 }
4883 }
4884
4885 if has_non_ascii_whitespace {
4886 for mut character_token in mem::take(&mut self.pending_character_tokens)
4887 {
4888 self.process_token_in_table_insertion_mode_anything_else(
4889 &mut character_token,
4890 )?;
4891 }
4892 } else {
4893 for mut character_token in mem::take(&mut self.pending_character_tokens)
4894 {
4895 self.insert_character(&mut character_token)?;
4896 }
4897 }
4898
4899 self.insertion_mode = self.original_insertion_mode.clone();
4900 self.process_token(token_and_info, None)?;
4901 }
4902 }
4903 }
4904 // The "in caption" insertion mode
4905 InsertionMode::InCaption => {
4906 match token {
4907 // An end tag whose tag name is "caption"
4908 //
4909 // If the stack of open elements does not have a caption element in table scope,
4910 // this is a parse error; ignore the token. (fragment case)
4911 //
4912 // Otherwise:
4913 //
4914 // Generate implied end tags.
4915 //
4916 // Now, if the current node is not a caption element, then this is a parse
4917 // error.
4918 //
4919 // Pop elements from this stack until a caption element has been popped from the
4920 // stack.
4921 //
4922 // Clear the list of active formatting elements up to the last marker.
4923 //
4924 // Switch the insertion mode to "in table".
4925 Token::EndTag { tag_name, .. } if tag_name == "caption" => {
4926 if !self.open_elements_stack.has_in_table_scope("caption") {
4927 self.errors.push(Error::new(
4928 token_and_info.span,
4929 ErrorKind::StrayEndTag(tag_name.clone()),
4930 ));
4931 } else {
4932 self.open_elements_stack.generate_implied_end_tags();
4933
4934 match self.open_elements_stack.items.last() {
4935 Some(node) if !is_html_element!(node, "caption") => {
4936 self.errors.push(Error::new(
4937 token_and_info.span,
4938 ErrorKind::UnclosedElements(tag_name.clone()),
4939 ));
4940 }
4941 _ => {}
4942 }
4943
4944 let popped = self
4945 .open_elements_stack
4946 .pop_until_tag_name_popped(&["caption"]);
4947
4948 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
4949 self.active_formatting_elements.clear_to_last_marker();
4950 self.insertion_mode = InsertionMode::InTable;
4951 }
4952 }
4953 // A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody",
4954 // "td", "tfoot", "th", "thead", "tr"
4955 //
4956 // An end tag whose tag name is "table"
4957 //
4958 // If the stack of open elements does not have a caption element in table scope,
4959 // this is a parse error; ignore the token. (fragment case)
4960 //
4961 // Otherwise:
4962 //
4963 // Generate implied end tags.
4964 //
4965 // Now, if the current node is not a caption element, then this is a parse
4966 // error.
4967 //
4968 // Pop elements from this stack until a caption element has been popped from the
4969 // stack.
4970 //
4971 // Clear the list of active formatting elements up to the last marker.
4972 //
4973 // Switch the insertion mode to "in table".
4974 //
4975 // Reprocess the token.
4976 Token::StartTag { tag_name, .. }
4977 if matches!(
4978 &**tag_name,
4979 "caption"
4980 | "col"
4981 | "colgroup"
4982 | "tbody"
4983 | "td"
4984 | "tfoot"
4985 | "th"
4986 | "thead"
4987 | "tr"
4988 ) =>
4989 {
4990 if !self.open_elements_stack.has_in_table_scope("caption") {
4991 self.errors.push(Error::new(
4992 token_and_info.span,
4993 ErrorKind::StrayStartTag(tag_name.clone()),
4994 ));
4995 } else {
4996 self.open_elements_stack.generate_implied_end_tags();
4997
4998 match self.open_elements_stack.items.last() {
4999 Some(node) if !is_html_element!(node, "caption") => {
5000 self.errors.push(Error::new(
5001 token_and_info.span,
5002 ErrorKind::UnclosedElementsOnStack,
5003 ));
5004 }
5005 _ => {}
5006 }
5007
5008 self.open_elements_stack
5009 .pop_until_tag_name_popped(&["caption"]);
5010 self.active_formatting_elements.clear_to_last_marker();
5011 self.insertion_mode = InsertionMode::InTable;
5012 self.process_token(token_and_info, None)?;
5013 }
5014 }
5015 Token::EndTag { tag_name, .. } if tag_name == "table" => {
5016 if !self.open_elements_stack.has_in_table_scope("caption") {
5017 self.errors.push(Error::new(
5018 token_and_info.span,
5019 ErrorKind::StrayEndTag(tag_name.clone()),
5020 ));
5021 } else {
5022 self.open_elements_stack.generate_implied_end_tags();
5023
5024 match self.open_elements_stack.items.last() {
5025 Some(node) if !is_html_element!(node, "caption") => {
5026 self.errors.push(Error::new(
5027 token_and_info.span,
5028 ErrorKind::UnclosedElementsOnStack,
5029 ));
5030 }
5031 _ => {}
5032 }
5033
5034 self.open_elements_stack
5035 .pop_until_tag_name_popped(&["caption"]);
5036 self.active_formatting_elements.clear_to_last_marker();
5037 self.insertion_mode = InsertionMode::InTable;
5038 self.process_token(token_and_info, None)?;
5039 }
5040 }
5041 // An end tag whose tag name is one of: "body", "col", "colgroup", "html",
5042 // "tbody", "td", "tfoot", "th", "thead", "tr"
5043 //
5044 // Parse error. Ignore the token.
5045 Token::EndTag { tag_name, .. }
5046 if matches!(
5047 &**tag_name,
5048 "body"
5049 | "col"
5050 | "colgroup"
5051 | "html"
5052 | "tbody"
5053 | "td"
5054 | "tfoot"
5055 | "th"
5056 | "thead"
5057 | "tr"
5058 ) =>
5059 {
5060 self.errors.push(Error::new(
5061 token_and_info.span,
5062 ErrorKind::StrayEndTag(tag_name.clone()),
5063 ));
5064 }
5065 // Anything else
5066 //
5067 // Process the token using the rules for the "in body" insertion mode.
5068 _ => {
5069 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
5070 }
5071 }
5072 }
5073 // The "in column group" insertion mode
5074 InsertionMode::InColumnGroup => {
5075 // When the user agent is to apply the rules for the "in column group" insertion
5076 // mode, the user agent must handle the token as follows:
5077 match token {
5078 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
5079 // FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020
5080 // SPACE
5081 //
5082 // Insert the character.
5083 Token::Character {
5084 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
5085 ..
5086 } => {
5087 self.insert_character(token_and_info)?;
5088 }
5089 // A comment token
5090 //
5091 // Insert a comment.
5092 Token::Comment { .. } => {
5093 self.insert_comment(token_and_info)?;
5094 }
5095 // A DOCTYPE token
5096 //
5097 // Parse error. Ignore the token.
5098 Token::Doctype { .. } => {
5099 self.errors
5100 .push(Error::new(token_and_info.span, ErrorKind::StrayDoctype));
5101 }
5102 // A start tag whose tag name is "html"
5103 //
5104 // Process the token using the rules for the "in body" insertion mode.
5105 Token::StartTag { tag_name, .. } if tag_name == "html" => {
5106 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
5107 }
5108 // A start tag whose tag name is "col"
5109 //
5110 // Insert an HTML element for the token. Immediately pop the current node off
5111 // the stack of open elements.
5112 //
5113 // Acknowledge the token's self-closing flag, if it is set.
5114 Token::StartTag {
5115 tag_name,
5116 is_self_closing,
5117 ..
5118 } if tag_name == "col" => {
5119 let is_self_closing = *is_self_closing;
5120
5121 self.insert_html_element(token_and_info)?;
5122 self.open_elements_stack.pop();
5123
5124 if is_self_closing {
5125 token_and_info.acknowledged = true;
5126 }
5127 }
5128 // An end tag whose tag name is "colgroup"
5129 //
5130 // If the current node is not a colgroup element, then this is a parse error;
5131 // ignore the token.
5132 //
5133 // Otherwise, pop the current node from the stack of open elements. Switch the
5134 // insertion mode to "in table".
5135 Token::EndTag { tag_name, .. } if tag_name == "colgroup" => {
5136 match self.open_elements_stack.items.last() {
5137 Some(node) if !is_html_element!(node, "colgroup") => {
5138 self.errors.push(Error::new(
5139 token_and_info.span,
5140 ErrorKind::UnclosedElements(tag_name.clone()),
5141 ));
5142 }
5143 _ => {
5144 let popped = self.open_elements_stack.pop();
5145
5146 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
5147 self.insertion_mode = InsertionMode::InTable;
5148 }
5149 }
5150 }
5151 // An end tag whose tag name is "col"
5152 //
5153 // Parse error. Ignore the token.
5154 Token::EndTag { tag_name, .. } if tag_name == "col" => {
5155 self.errors.push(Error::new(
5156 token_and_info.span,
5157 ErrorKind::StrayEndTag(tag_name.clone()),
5158 ));
5159 }
5160 // A start tag whose tag name is "template"
5161 //
5162 // An end tag whose tag name is "template"
5163 //
5164 // Process the token using the rules for the "in head" insertion mode.
5165 Token::StartTag { tag_name, .. } if tag_name == "template" => {
5166 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
5167 }
5168 Token::EndTag { tag_name, .. } if tag_name == "template" => {
5169 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
5170 }
5171 // An end-of-file token
5172 // Process the token using the rules for the "in body" insertion mode.
5173 Token::Eof => {
5174 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
5175 }
5176 // Anything else
5177 //
5178 // If the current node is not a colgroup element, then this is a parse error;
5179 // ignore the token.
5180 //
5181 // Otherwise, pop the current node from the stack of open elements.
5182 //
5183 // Switch the insertion mode to "in table".
5184 //
5185 // Reprocess the token.
5186 _ => match self.open_elements_stack.items.last() {
5187 Some(node) if !is_html_element!(node, "colgroup") => match token {
5188 Token::Character { .. } => {
5189 self.errors.push(Error::new(
5190 token_and_info.span,
5191 ErrorKind::NonSpaceCharacterInColumnGroup,
5192 ));
5193 }
5194 _ => {
5195 self.errors.push(Error::new(
5196 token_and_info.span,
5197 ErrorKind::GarbageInColumnGroup,
5198 ));
5199 }
5200 },
5201 _ => {
5202 self.open_elements_stack.pop();
5203 self.insertion_mode = InsertionMode::InTable;
5204 self.process_token(token_and_info, None)?;
5205 }
5206 },
5207 }
5208 }
5209 // The "in table body" insertion mode
5210 InsertionMode::InTableBody => {
5211 // When the user agent is to apply the rules for the "in table body" insertion
5212 // mode, the user agent must handle the token as follows:
5213 match token {
5214 // A start tag whose tag name is "tr"
5215 //
5216 // Clear the stack back to a table body context. (See below.)
5217 //
5218 // Insert an HTML element for the token, then switch the insertion mode to "in
5219 // row".
5220 Token::StartTag {
5221 tag_name,
5222 is_self_closing,
5223 ..
5224 } if tag_name == "tr" => {
5225 self.open_elements_stack.clear_back_to_table_body_context();
5226 self.insert_html_element(token_and_info)?;
5227 self.insertion_mode = InsertionMode::InRow;
5228 maybe_allow_self_closing!(is_self_closing, tag_name);
5229 }
5230 // A start tag whose tag name is one of: "th", "td"
5231 //
5232 // Parse error.
5233 //
5234 // Clear the stack back to a table body context. (See below.)
5235 //
5236 // Insert an HTML element for a "tr" start tag token with no attributes, then
5237 // switch the insertion mode to "in row".
5238 //
5239 // Reprocess the current token.
5240 Token::StartTag { tag_name, .. } if matches!(&**tag_name, "th" | "td") => {
5241 self.errors.push(Error::new(
5242 token_and_info.span,
5243 ErrorKind::StartTagInTableBody(tag_name.clone()),
5244 ));
5245 self.open_elements_stack.clear_back_to_table_body_context();
5246 self.insert_html_element(&self.create_fake_token_and_info("tr", None))?;
5247 self.insertion_mode = InsertionMode::InRow;
5248 self.process_token(token_and_info, None)?;
5249 }
5250 // An end tag whose tag name is one of: "tbody", "tfoot", "thead"
5251 //
5252 // If the stack of open elements does not have an element in table scope that is
5253 // an HTML element with the same tag name as the token, this is a parse error;
5254 // ignore the token.
5255 //
5256 // Otherwise:
5257 //
5258 // Clear the stack back to a table body context. (See below.)
5259 //
5260 // Pop the current node from the stack of open elements. Switch the insertion
5261 // mode to "in table".
5262 Token::EndTag { tag_name, .. }
5263 if matches!(&**tag_name, "tbody" | "tfoot" | "thead") =>
5264 {
5265 if !self.open_elements_stack.has_in_table_scope(tag_name) {
5266 self.errors.push(Error::new(
5267 token_and_info.span,
5268 ErrorKind::StrayEndTag(tag_name.clone()),
5269 ));
5270 } else {
5271 self.open_elements_stack.clear_back_to_table_body_context();
5272 self.update_end_tag_span(
5273 self.open_elements_stack.items.last(),
5274 token_and_info.span,
5275 );
5276 self.open_elements_stack.pop();
5277 self.insertion_mode = InsertionMode::InTable;
5278 }
5279 }
5280 // A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody",
5281 // "tfoot", "thead"
5282 //
5283 // An end tag whose tag name is "table"
5284 //
5285 // If the stack of open elements does not have a tbody, thead, or tfoot element
5286 // in table scope, this is a parse error; ignore the token.
5287 //
5288 // Otherwise:
5289 //
5290 // Clear the stack back to a table body context. (See below.)
5291 //
5292 // Pop the current node from the stack of open elements. Switch the insertion
5293 // mode to "in table".
5294 //
5295 // Reprocess the token.
5296 Token::StartTag { tag_name, .. }
5297 if matches!(
5298 &**tag_name,
5299 "caption" | "col" | "colgroup" | "tbody" | "tfoot" | "thead"
5300 ) =>
5301 {
5302 if !(self.open_elements_stack.has_in_table_scope("tbody")
5303 || self.open_elements_stack.has_in_table_scope("thead")
5304 || self.open_elements_stack.has_in_table_scope("tfoot"))
5305 {
5306 self.errors.push(Error::new(
5307 token_and_info.span,
5308 ErrorKind::StrayStartTag(tag_name.clone()),
5309 ));
5310 } else {
5311 self.open_elements_stack.clear_back_to_table_body_context();
5312 self.open_elements_stack.pop();
5313 self.insertion_mode = InsertionMode::InTable;
5314 self.process_token(token_and_info, None)?;
5315 }
5316 }
5317 Token::EndTag { tag_name, .. } if tag_name == "table" => {
5318 if !(self.open_elements_stack.has_in_table_scope("tbody")
5319 || self.open_elements_stack.has_in_table_scope("thead")
5320 || self.open_elements_stack.has_in_table_scope("tfoot"))
5321 {
5322 self.errors.push(Error::new(
5323 token_and_info.span,
5324 ErrorKind::StrayEndTag(tag_name.clone()),
5325 ));
5326 } else {
5327 self.open_elements_stack.clear_back_to_table_body_context();
5328 self.open_elements_stack.pop();
5329 self.insertion_mode = InsertionMode::InTable;
5330 self.process_token(token_and_info, None)?;
5331 }
5332 }
5333 // An end tag whose tag name is one of: "body", "caption", "col", "colgroup",
5334 // "html", "td", "th", "tr"
5335 //
5336 // Parse error. Ignore the token.
5337 Token::EndTag { tag_name, .. }
5338 if matches!(
5339 &**tag_name,
5340 "body" | "caption" | "col" | "colgroup" | "html" | "td" | "th" | "tr"
5341 ) =>
5342 {
5343 self.errors.push(Error::new(
5344 token_and_info.span,
5345 ErrorKind::StrayEndTag(tag_name.clone()),
5346 ));
5347 }
5348 // Anything else
5349 //
5350 // Process the token using the rules for the "in table" insertion mode.
5351 _ => {
5352 self.process_token_using_rules(token_and_info, InsertionMode::InTable)?;
5353 }
5354 }
5355 }
5356 // The "in row" insertion mode
5357 InsertionMode::InRow => {
5358 // When the user agent is to apply the rules for the "in row" insertion mode,
5359 // the user agent must handle the token as follows:
5360 match token {
5361 // A start tag whose tag name is one of: "th", "td"
5362 //
5363 // Clear the stack back to a table row context. (See below.)
5364 //
5365 // Insert an HTML element for the token, then switch the insertion mode to "in
5366 // cell".
5367 //
5368 // Insert a marker at the end of the list of active formatting elements.
5369 Token::StartTag {
5370 tag_name,
5371 is_self_closing,
5372 ..
5373 } if matches!(&**tag_name, "th" | "td") => {
5374 self.open_elements_stack.clear_back_to_table_row_context();
5375 self.insert_html_element(token_and_info)?;
5376 self.insertion_mode = InsertionMode::InCell;
5377 self.active_formatting_elements.insert_marker();
5378 maybe_allow_self_closing!(is_self_closing, tag_name);
5379 }
5380 // An end tag whose tag name is "tr"
5381 //
5382 // If the stack of open elements does not have a tr element in table scope, this
5383 // is a parse error; ignore the token.
5384 //
5385 // Otherwise:
5386 //
5387 // Clear the stack back to a table row context. (See below.)
5388 //
5389 // Pop the current node (which will be a tr element) from the stack of open
5390 // elements. Switch the insertion mode to "in table body".
5391 Token::EndTag { tag_name, .. } if tag_name == "tr" => {
5392 if !self.open_elements_stack.has_in_table_scope("tr") {
5393 self.errors.push(Error::new(
5394 token_and_info.span,
5395 ErrorKind::NoTableRowToClose,
5396 ));
5397 } else {
5398 self.open_elements_stack.clear_back_to_table_row_context();
5399 self.update_end_tag_span(
5400 self.open_elements_stack.items.last(),
5401 token_and_info.span,
5402 );
5403 self.open_elements_stack.pop();
5404 self.insertion_mode = InsertionMode::InTableBody;
5405 }
5406 }
5407 // A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody",
5408 // "tfoot", "thead", "tr"
5409 //
5410 // An end tag whose tag name is "table"
5411 //
5412 // If the stack of open elements does not have a tr element in table scope, this
5413 // is a parse error; ignore the token.
5414 //
5415 // Otherwise:
5416 //
5417 // Clear the stack back to a table row context. (See below.)
5418 //
5419 // Pop the current node (which will be a tr element) from the stack of open
5420 // elements. Switch the insertion mode to "in table body".
5421 //
5422 // Reprocess the token.
5423 Token::StartTag { tag_name, .. }
5424 if matches!(
5425 &**tag_name,
5426 "caption" | "col" | "colgroup" | "tbody" | "tfoot" | "thead" | "tr"
5427 ) =>
5428 {
5429 if !self.open_elements_stack.has_in_table_scope("tr") {
5430 self.errors.push(Error::new(
5431 token_and_info.span,
5432 ErrorKind::NoTableRowToClose,
5433 ));
5434 } else {
5435 self.open_elements_stack.clear_back_to_table_row_context();
5436 self.open_elements_stack.pop();
5437 self.insertion_mode = InsertionMode::InTableBody;
5438 self.process_token(token_and_info, None)?;
5439 }
5440 }
5441 Token::EndTag { tag_name, .. } if tag_name == "table" => {
5442 if !self.open_elements_stack.has_in_table_scope("tr") {
5443 self.errors.push(Error::new(
5444 token_and_info.span,
5445 ErrorKind::NoTableRowToClose,
5446 ));
5447 } else {
5448 self.open_elements_stack.clear_back_to_table_row_context();
5449 self.open_elements_stack.pop();
5450 self.insertion_mode = InsertionMode::InTableBody;
5451 self.process_token(token_and_info, None)?;
5452 }
5453 }
5454 // An end tag whose tag name is one of: "tbody", "tfoot", "thead"
5455 //
5456 // If the stack of open elements does not have an element in table scope that is
5457 // an HTML element with the same tag name as the token, this is a parse error;
5458 // ignore the token.
5459 //
5460 // If the stack of open elements does not have a tr element in table scope,
5461 // ignore the token.
5462 //
5463 // Otherwise:
5464 //
5465 // Clear the stack back to a table row context. (See below.)
5466 //
5467 // Pop the current node (which will be a tr element) from the stack of open
5468 // elements. Switch the insertion mode to "in table body".
5469 //
5470 // Reprocess the token.
5471 Token::EndTag { tag_name, .. }
5472 if matches!(&**tag_name, "tbody" | "tfoot" | "thead") =>
5473 {
5474 if !self.open_elements_stack.has_in_table_scope(tag_name) {
5475 self.errors.push(Error::new(
5476 token_and_info.span,
5477 ErrorKind::StrayEndTag(tag_name.clone()),
5478 ));
5479 } else if !self.open_elements_stack.has_in_table_scope("tr") {
5480 // Ignore
5481
5482 return Ok(());
5483 } else {
5484 self.open_elements_stack.clear_back_to_table_row_context();
5485 self.open_elements_stack.pop();
5486 self.insertion_mode = InsertionMode::InTableBody;
5487 self.process_token(token_and_info, None)?;
5488 }
5489 }
5490 // An end tag whose tag name is one of: "body", "caption", "col", "colgroup",
5491 // "html", "td", "th"
5492 //
5493 // Parse error. Ignore the token.
5494 Token::EndTag { tag_name, .. }
5495 if matches!(
5496 &**tag_name,
5497 "body" | "caption" | "col" | "colgroup" | "html" | "td" | "th"
5498 ) =>
5499 {
5500 self.errors.push(Error::new(
5501 token_and_info.span,
5502 ErrorKind::StrayEndTag(tag_name.clone()),
5503 ));
5504 }
5505 // Anything else
5506 //
5507 // Process the token using the rules for the "in table" insertion mode.
5508 _ => {
5509 self.process_token_using_rules(token_and_info, InsertionMode::InTable)?;
5510 }
5511 }
5512 }
5513 // The "in cell" insertion mode
5514 InsertionMode::InCell => {
5515 // When the user agent is to apply the rules for the "in cell" insertion mode,
5516 // the user agent must handle the token as follows:
5517 match token {
5518 // An end tag whose tag name is one of: "td", "th"
5519 //
5520 // If the stack of open elements does not have an element in table scope that is
5521 // an HTML element with the same tag name as that of the token, then this is a
5522 // parse error; ignore the token.
5523 //
5524 // Otherwise:
5525 //
5526 // Generate implied end tags.
5527 //
5528 // Now, if the current node is not an HTML element with the same tag name as the
5529 // token, then this is a parse error.
5530 //
5531 // Pop elements from the stack of open elements stack until an HTML element with
5532 // the same tag name as the token has been popped from the stack.
5533 //
5534 // Clear the list of active formatting elements up to the last marker.
5535 //
5536 // Switch the insertion mode to "in row".
5537 Token::EndTag { tag_name, .. } if matches!(&**tag_name, "td" | "th") => {
5538 if !self.open_elements_stack.has_in_table_scope(tag_name) {
5539 self.errors.push(Error::new(
5540 token_and_info.span,
5541 ErrorKind::StrayEndTag(tag_name.clone()),
5542 ));
5543 } else {
5544 self.open_elements_stack.generate_implied_end_tags();
5545
5546 match self.open_elements_stack.items.last() {
5547 Some(node) if !is_html_element_with_tag_name!(node, tag_name) => {
5548 self.errors.push(Error::new(
5549 token_and_info.span,
5550 ErrorKind::UnclosedElements(tag_name.clone()),
5551 ));
5552 }
5553 _ => {}
5554 }
5555
5556 let popped = self
5557 .open_elements_stack
5558 .pop_until_tag_name_popped(&[tag_name]);
5559
5560 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
5561 self.active_formatting_elements.clear_to_last_marker();
5562 self.insertion_mode = InsertionMode::InRow;
5563 }
5564 }
5565 // A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody",
5566 // "td", "tfoot", "th", "thead", "tr"
5567 //
5568 // If the stack of open elements does not have a td or th element in table
5569 // scope, then this is a parse error; ignore the token. (fragment case)
5570 //
5571 // Otherwise, close the cell (see below) and reprocess the token.
5572 Token::StartTag { tag_name, .. }
5573 if matches!(
5574 &**tag_name,
5575 "caption"
5576 | "col"
5577 | "colgroup"
5578 | "tbody"
5579 | "td"
5580 | "tfoot"
5581 | "th"
5582 | "thead"
5583 | "tr"
5584 ) =>
5585 {
5586 if !self.open_elements_stack.has_in_table_scope("td")
5587 && !self.open_elements_stack.has_in_table_scope("th")
5588 {
5589 self.errors
5590 .push(Error::new(token_and_info.span, ErrorKind::NoCellToClose));
5591 } else {
5592 self.close_the_cell();
5593 self.process_token(token_and_info, None)?;
5594 }
5595 }
5596 // An end tag whose tag name is one of: "body", "caption", "col", "colgroup",
5597 // "html"
5598 //
5599 // Parse error. Ignore the token.
5600 Token::EndTag { tag_name, .. }
5601 if matches!(
5602 &**tag_name,
5603 "body" | "caption" | "col" | "colgroup" | "html"
5604 ) =>
5605 {
5606 self.errors.push(Error::new(
5607 token_and_info.span,
5608 ErrorKind::StrayEndTag(tag_name.clone()),
5609 ));
5610 }
5611 // An end tag whose tag name is one of: "table", "tbody", "tfoot", "thead", "tr"
5612 //
5613 // If the stack of open elements does not have an element in table scope that is
5614 // an HTML element with the same tag name as that of the token, then this is a
5615 // parse error; ignore the token.
5616 //
5617 // Otherwise, close the cell (see below) and reprocess the token.
5618 Token::EndTag { tag_name, .. }
5619 if matches!(&**tag_name, "table" | "tbody" | "tfoot" | "thead" | "tr") =>
5620 {
5621 if !self.open_elements_stack.has_in_table_scope(tag_name) {
5622 self.errors.push(Error::new(
5623 token_and_info.span,
5624 ErrorKind::StrayEndTag(tag_name.clone()),
5625 ))
5626 } else {
5627 self.close_the_cell();
5628 self.process_token(token_and_info, None)?;
5629 }
5630 }
5631 // Anything else
5632 //
5633 // Process the token using the rules for the "in body" insertion mode.
5634 _ => {
5635 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
5636 }
5637 }
5638
5639 // Where the steps above say to close the cell, they mean to run
5640 // the following algorithm:
5641 //
5642 // Generate implied end tags.
5643 //
5644 // If the current node is not now a td element or a th element,
5645 // then this is a parse error.
5646 //
5647 // Pop elements from the stack of open elements stack until a td
5648 // element or a th element has been popped from the stack.
5649 //
5650 // Clear the list of active formatting elements up to the last
5651 // marker.
5652 //
5653 // Switch the insertion mode to "in row".
5654 }
5655 // The "in select" insertion mode
5656 InsertionMode::InSelect => {
5657 match token {
5658 // A character token that is U+0000 NULL
5659 //
5660 // Parse error. Ignore the token.
5661 Token::Character { value, .. } if *value == '\x00' => self.errors.push(
5662 Error::new(token_and_info.span, ErrorKind::UnexpectedNullCharacter),
5663 ),
5664 // Any other character token
5665 //
5666 // Insert the token's character.
5667 Token::Character { .. } => {
5668 self.insert_character(token_and_info)?;
5669 }
5670 // A comment token
5671 //
5672 // Insert a comment.
5673 Token::Comment { .. } => {
5674 self.insert_comment(token_and_info)?;
5675 }
5676 // A DOCTYPE token
5677 //
5678 // Parse error. Ignore the token.
5679 Token::Doctype { .. } => {
5680 self.errors
5681 .push(Error::new(token_and_info.span, ErrorKind::StrayDoctype));
5682 }
5683 // A start tag whose tag name is "html"
5684 //
5685 // Process the token using the rules for the "in body" insertion mode.
5686 Token::StartTag { tag_name, .. } if tag_name == "html" => {
5687 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
5688 }
5689 // A start tag whose tag name is "option"
5690 //
5691 // If the current node is an option element, pop that node from the stack of
5692 // open elements.
5693 //
5694 // Insert an HTML element for the token.
5695 Token::StartTag {
5696 tag_name,
5697 is_self_closing,
5698 ..
5699 } if tag_name == "option" => {
5700 match self.open_elements_stack.items.last() {
5701 Some(node) if is_html_element!(node, "option") => {
5702 self.open_elements_stack.pop();
5703 }
5704 _ => {}
5705 }
5706
5707 self.insert_html_element(token_and_info)?;
5708 maybe_allow_self_closing!(is_self_closing, tag_name);
5709 }
5710 // A start tag whose tag name is "optgroup"
5711 //
5712 // If the current node is an option element, pop that node from the stack of
5713 // open elements.
5714 //
5715 // If the current node is an optgroup element, pop that node from the stack of
5716 // open elements.
5717 //
5718 // Insert an HTML element for the token.
5719 Token::StartTag {
5720 tag_name,
5721 is_self_closing,
5722 ..
5723 } if tag_name == "optgroup" => {
5724 match self.open_elements_stack.items.last() {
5725 Some(node) if is_html_element!(node, "option") => {
5726 self.open_elements_stack.pop();
5727 }
5728 _ => {}
5729 }
5730
5731 match self.open_elements_stack.items.last() {
5732 Some(node) if is_html_element!(node, "optgroup") => {
5733 self.open_elements_stack.pop();
5734 }
5735 _ => {}
5736 }
5737
5738 self.insert_html_element(token_and_info)?;
5739 maybe_allow_self_closing!(is_self_closing, tag_name);
5740 }
5741 // An end tag whose tag name is "optgroup"
5742 //
5743 // First, if the current node is an option element, and the node immediately
5744 // before it in the stack of open elements is an optgroup element, then pop the
5745 // current node from the stack of open elements.
5746 //
5747 // If the current node is an optgroup element, then pop that node from the stack
5748 // of open elements. Otherwise, this is a parse error; ignore the token.
5749 Token::EndTag { tag_name, .. } if tag_name == "optgroup" => {
5750 match self.open_elements_stack.items.last() {
5751 Some(node) if is_html_element!(node, "option") => {
5752 match self
5753 .open_elements_stack
5754 .items
5755 // `-1` is `current node`, because `The current node is the
5756 // bottommost node in this stack of open elements.`
5757 // `-2` is node immediately before it in the stack of open
5758 // elements
5759 .get(self.open_elements_stack.items.len() - 2)
5760 {
5761 Some(node) if is_html_element!(node, "optgroup") => {
5762 let popped = self.open_elements_stack.pop();
5763
5764 self.update_end_tag_span(
5765 popped.as_ref(),
5766 token_and_info.span,
5767 );
5768 }
5769 _ => {}
5770 }
5771 }
5772 _ => {}
5773 }
5774
5775 match self.open_elements_stack.items.last() {
5776 Some(node) if is_html_element!(node, "optgroup") => {
5777 let popped = self.open_elements_stack.pop();
5778
5779 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
5780 }
5781 _ => self.errors.push(Error::new(
5782 token_and_info.span,
5783 ErrorKind::StrayEndTag(tag_name.clone()),
5784 )),
5785 }
5786 }
5787 // An end tag whose tag name is "option"
5788 //
5789 // If the current node is an option element, then pop that node from the stack
5790 // of open elements. Otherwise, this is a parse error; ignore the token.
5791 Token::EndTag { tag_name, .. } if tag_name == "option" => {
5792 match self.open_elements_stack.items.last() {
5793 Some(node) if is_html_element!(node, "option") => {
5794 let popped = self.open_elements_stack.pop();
5795
5796 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
5797 }
5798 _ => self.errors.push(Error::new(
5799 token_and_info.span,
5800 ErrorKind::StrayEndTag(tag_name.clone()),
5801 )),
5802 }
5803 }
5804 // An end tag whose tag name is "select"
5805 //
5806 // If the stack of open elements does not have a select element in select scope,
5807 // this is a parse error; ignore the token. (fragment case)
5808 //
5809 // Otherwise:
5810 //
5811 // Pop elements from the stack of open elements until a select element has been
5812 // popped from the stack.
5813 //
5814 // Reset the insertion mode appropriately.
5815 Token::EndTag { tag_name, .. } if tag_name == "select" => {
5816 if !self.open_elements_stack.has_in_select_scope("select") {
5817 self.errors.push(Error::new(
5818 token_and_info.span,
5819 ErrorKind::StrayEndTag(tag_name.clone()),
5820 ));
5821 } else {
5822 let popped = self
5823 .open_elements_stack
5824 .pop_until_tag_name_popped(&["select"]);
5825
5826 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
5827 self.reset_insertion_mode();
5828 }
5829 }
5830 // A start tag whose tag name is "select"
5831 //
5832 // Parse error.
5833 //
5834 // If the stack of open elements does not have a select element in select scope,
5835 // ignore the token. (fragment case)
5836 //
5837 // Otherwise:
5838 //
5839 // Pop elements from the stack of open elements until a select element has been
5840 // popped from the stack.
5841 //
5842 // Reset the insertion mode appropriately.
5843 Token::StartTag { tag_name, .. } if tag_name == "select" => {
5844 self.errors.push(Error::new(
5845 token_and_info.span,
5846 ErrorKind::StartSelectWhereEndSelectExpected,
5847 ));
5848
5849 if !self.open_elements_stack.has_in_select_scope("select") {
5850 // Ignore
5851
5852 return Ok(());
5853 }
5854
5855 self.open_elements_stack
5856 .pop_until_tag_name_popped(&["select"]);
5857 self.reset_insertion_mode();
5858 }
5859 // A start tag whose tag name is one of: "input", "keygen", "textarea"
5860 //
5861 // Parse error.
5862 //
5863 // If the stack of open elements does not have a select element in select scope,
5864 // ignore the token. (fragment case)
5865 //
5866 // Otherwise:
5867 //
5868 // Pop elements from the stack of open elements until a select element has been
5869 // popped from the stack.
5870 //
5871 // Reset the insertion mode appropriately.
5872 //
5873 // Reprocess the token.
5874 Token::StartTag { tag_name, .. }
5875 if matches!(&**tag_name, "input" | "keygen" | "textarea") =>
5876 {
5877 self.errors.push(Error::new(
5878 token_and_info.span,
5879 ErrorKind::StartTagWithSelectOpen(tag_name.clone()),
5880 ));
5881
5882 if !self.open_elements_stack.has_in_select_scope("select") {
5883 // Ignore
5884 return Ok(());
5885 }
5886
5887 self.open_elements_stack
5888 .pop_until_tag_name_popped(&["select"]);
5889 self.reset_insertion_mode();
5890 self.process_token(token_and_info, None)?;
5891 }
5892 // A start tag whose tag name is one of: "script", "template"
5893 //
5894 // An end tag whose tag name is "template"
5895 //
5896 // Process the token using the rules for the "in head" insertion mode.
5897 Token::StartTag { tag_name, .. }
5898 if matches!(&**tag_name, "script" | "template") =>
5899 {
5900 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
5901 }
5902 Token::EndTag { tag_name, .. } if tag_name == "template" => {
5903 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
5904 }
5905 // An end-of-file token
5906 //
5907 // Process the token using the rules for the "in body" insertion mode.
5908 Token::Eof => {
5909 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
5910 }
5911 // Anything else
5912 //
5913 // Parse error. Ignore the token.
5914 _ => match token {
5915 Token::StartTag { tag_name, .. } => {
5916 self.errors.push(Error::new(
5917 token_and_info.span,
5918 ErrorKind::StrayStartTag(tag_name.clone()),
5919 ));
5920 }
5921 Token::EndTag { tag_name, .. } => {
5922 self.errors.push(Error::new(
5923 token_and_info.span,
5924 ErrorKind::StrayEndTag(tag_name.clone()),
5925 ));
5926 }
5927 _ => {
5928 unreachable!()
5929 }
5930 },
5931 }
5932 }
5933 // The "in select in table" insertion mode
5934 InsertionMode::InSelectInTable => {
5935 // When the user agent is to apply the rules for the "in select in table"
5936 // insertion mode, the user agent must handle the token as follows:
5937 match token {
5938 // A start tag whose tag name is one of: "caption", "table", "tbody", "tfoot",
5939 // "thead", "tr", "td", "th"
5940 //
5941 // Parse error.
5942 //
5943 // Pop elements from the stack of open elements until a select element has been
5944 // popped from the stack.
5945 //
5946 // Reset the insertion mode appropriately.
5947 //
5948 // Reprocess the token.
5949 Token::StartTag { tag_name, .. }
5950 if matches!(
5951 &**tag_name,
5952 "caption" | "table" | "tbody" | "tfoot" | "thead" | "tr" | "td" | "th"
5953 ) =>
5954 {
5955 self.errors.push(Error::new(
5956 token_and_info.span,
5957 ErrorKind::StartTagWithSelectOpen(tag_name.clone()),
5958 ));
5959 self.open_elements_stack
5960 .pop_until_tag_name_popped(&["select"]);
5961 self.reset_insertion_mode();
5962 self.process_token(token_and_info, None)?;
5963 }
5964 // An end tag whose tag name is one of: "caption", "table", "tbody", "tfoot",
5965 // "thead", "tr", "td", "th"
5966 //
5967 // Parse error.
5968 //
5969 // If the stack of open elements does not have an element in table scope that is
5970 // an HTML element with the same tag name as that of the token, then ignore the
5971 // token.
5972 //
5973 // Otherwise:
5974 //
5975 // Pop elements from the stack of open elements until a select element has been
5976 // popped from the stack.
5977 //
5978 // Reset the insertion mode appropriately.
5979 //
5980 // Reprocess the token.
5981 Token::EndTag { tag_name, .. }
5982 if matches!(
5983 &**tag_name,
5984 "caption" | "table" | "tbody" | "tfoot" | "thead" | "tr" | "td" | "th"
5985 ) =>
5986 {
5987 self.errors.push(Error::new(
5988 token_and_info.span,
5989 ErrorKind::EndTagSeenWithSelectOpen(tag_name.clone()),
5990 ));
5991
5992 if !self.open_elements_stack.has_in_table_scope(tag_name) {
5993 // Ignore
5994 return Ok(());
5995 }
5996
5997 self.open_elements_stack
5998 .pop_until_tag_name_popped(&["select"]);
5999 self.reset_insertion_mode();
6000 self.process_token(token_and_info, None)?;
6001 }
6002 // Anything else
6003 //
6004 // Process the token using the rules for the "in select" insertion mode.
6005 _ => {
6006 self.process_token_using_rules(token_and_info, InsertionMode::InSelect)?;
6007 }
6008 }
6009 }
6010 // The "in template" insertion mode
6011 InsertionMode::InTemplate => {
6012 // When the user agent is to apply the rules for the "in template" insertion
6013 // mode, the user agent must handle the token as follows:
6014 match token {
6015 // A character token
6016 // A comment token
6017 // A DOCTYPE token
6018 //
6019 // Process the token using the rules for the "in body" insertion mode.
6020 Token::Character { .. } | Token::Comment { .. } | Token::Doctype { .. } => {
6021 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
6022 }
6023 // A start tag whose tag name is one of: "base", "basefont", "bgsound", "link",
6024 // "meta", "noframes", "script", "style", "template", "title"
6025 //
6026 // An end tag whose tag name is "template"
6027 //
6028 // Process the token using the rules for the "in head" insertion mode.
6029 Token::StartTag { tag_name, .. }
6030 if matches!(
6031 &**tag_name,
6032 "base"
6033 | "basefont"
6034 | "bgsound"
6035 | "link"
6036 | "meta"
6037 | "noframes"
6038 | "script"
6039 | "style"
6040 | "template"
6041 | "title"
6042 ) =>
6043 {
6044 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
6045 }
6046 Token::EndTag { tag_name, .. } if tag_name == "template" => {
6047 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
6048 }
6049 // A start tag whose tag name is one of: "caption", "colgroup", "tbody",
6050 // "tfoot", "thead"
6051 //
6052 // Pop the current template insertion mode off the stack of template insertion
6053 // modes.
6054 //
6055 // Push "in table" onto the stack of template insertion modes so that it is the
6056 // new current template insertion mode.
6057 //
6058 // Switch the insertion mode to "in table", and reprocess the token.
6059 Token::StartTag { tag_name, .. }
6060 if matches!(
6061 &**tag_name,
6062 "caption" | "colgroup" | "tbody" | "tfoot" | "thead"
6063 ) =>
6064 {
6065 self.template_insertion_mode_stack.pop();
6066 self.template_insertion_mode_stack
6067 .push(InsertionMode::InTable);
6068 self.insertion_mode = InsertionMode::InTable;
6069 self.process_token(token_and_info, None)?;
6070 }
6071 // A start tag whose tag name is "col"
6072 //
6073 // Pop the current template insertion mode off the stack of template insertion
6074 // modes.
6075 //
6076 // Push "in column group" onto the stack of template insertion modes so that it
6077 // is the new current template insertion mode.
6078 //
6079 // Switch the insertion mode to "in column group", and reprocess the token.
6080 Token::StartTag { tag_name, .. } if tag_name == "col" => {
6081 self.template_insertion_mode_stack.pop();
6082 self.template_insertion_mode_stack
6083 .push(InsertionMode::InColumnGroup);
6084 self.insertion_mode = InsertionMode::InColumnGroup;
6085 self.process_token(token_and_info, None)?;
6086 }
6087 // A start tag whose tag name is "tr"
6088 //
6089 // Pop the current template insertion mode off the stack of template insertion
6090 // modes.
6091 //
6092 // Push "in table body" onto the stack of template insertion modes so that it is
6093 // the new current template insertion mode.
6094 //
6095 // Switch the insertion mode to "in table body", and reprocess the token.
6096 Token::StartTag { tag_name, .. } if tag_name == "tr" => {
6097 self.template_insertion_mode_stack.pop();
6098 self.template_insertion_mode_stack
6099 .push(InsertionMode::InTableBody);
6100 self.insertion_mode = InsertionMode::InTableBody;
6101 self.process_token(token_and_info, None)?;
6102 }
6103 // A start tag whose tag name is one of: "td", "th"
6104 //
6105 // Pop the current template insertion mode off the stack of template insertion
6106 // modes.
6107 //
6108 // Push "in row" onto the stack of template insertion modes so that it is the
6109 // new current template insertion mode.
6110 //
6111 // Switch the insertion mode to "in row", and reprocess the token.
6112 Token::StartTag { tag_name, .. } if matches!(&**tag_name, "td" | "th") => {
6113 self.template_insertion_mode_stack.pop();
6114 self.template_insertion_mode_stack
6115 .push(InsertionMode::InRow);
6116 self.insertion_mode = InsertionMode::InRow;
6117 self.process_token(token_and_info, None)?;
6118 }
6119 // Any other start tag
6120 //
6121 // Pop the current template insertion mode off the stack of template insertion
6122 // modes.
6123 //
6124 // Push "in body" onto the stack of template insertion modes so that it is the
6125 // new current template insertion mode.
6126 //
6127 // Switch the insertion mode to "in body", and reprocess the token.
6128 Token::StartTag { .. } => {
6129 self.template_insertion_mode_stack.pop();
6130 self.template_insertion_mode_stack
6131 .push(InsertionMode::InBody);
6132 self.insertion_mode = InsertionMode::InBody;
6133 self.process_token(token_and_info, None)?;
6134 }
6135 // Any other end tag
6136 //
6137 // Parse error. Ignore the token.
6138 Token::EndTag { tag_name, .. } => {
6139 self.errors.push(Error::new(
6140 token_and_info.span,
6141 ErrorKind::StrayEndTag(tag_name.clone()),
6142 ));
6143 }
6144 // An end-of-file token
6145 //
6146 // If there is no template element on the stack of open elements, then stop
6147 // parsing. (fragment case)
6148 //
6149 // Otherwise, this is a parse error.
6150 //
6151 // Pop elements from the stack of open elements until a template element has
6152 // been popped from the stack.
6153 //
6154 // Clear the list of active formatting elements up to the last marker.
6155 //
6156 // Pop the current template insertion mode off the stack of template insertion
6157 // modes.
6158 //
6159 // Reset the insertion mode appropriately.
6160 //
6161 // Reprocess the token.
6162 Token::Eof => {
6163 if !self.open_elements_stack.contains_template_element() {
6164 self.stopped = true;
6165 } else {
6166 self.update_end_tag_span(
6167 self.open_elements_stack.items.last(),
6168 token_and_info.span,
6169 );
6170 self.errors.push(Error::new(
6171 token_and_info.span,
6172 ErrorKind::EofWithUnclosedElements,
6173 ));
6174 self.open_elements_stack
6175 .pop_until_tag_name_popped(&["template"]);
6176 self.active_formatting_elements.clear_to_last_marker();
6177 self.template_insertion_mode_stack.pop();
6178 self.reset_insertion_mode();
6179 self.process_token(token_and_info, None)?;
6180 }
6181 }
6182 }
6183 }
6184 // The "after body" insertion mode
6185 InsertionMode::AfterBody => {
6186 // When the user agent is to apply the rules for the "after body" insertion
6187 // mode, the user agent must handle the token as follows:
6188 match token {
6189 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
6190 // FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020
6191 // SPACE
6192 //
6193 // Process the token using the rules for the "in body" insertion mode.
6194 Token::Character {
6195 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
6196 ..
6197 } => {
6198 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
6199 }
6200 // A comment token
6201 //
6202 // Insert a comment as the last child of the first element in the stack of open
6203 // elements (the html element).
6204 Token::Comment { .. } => {
6205 self.insert_comment_as_last_child_of_first_element(token_and_info)?;
6206 }
6207 // A DOCTYPE token
6208 //
6209 // Parse error. Ignore the token.
6210 Token::Doctype { .. } => {
6211 self.errors
6212 .push(Error::new(token_and_info.span, ErrorKind::StrayDoctype));
6213 }
6214 // A start tag whose tag name is "html"
6215 //
6216 // Process the token using the rules for the "in body" insertion mode.
6217 Token::StartTag { tag_name, .. } if tag_name == "html" => {
6218 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
6219 }
6220 // An end tag whose tag name is "html"
6221 //
6222 // If the parser was created as part of the HTML fragment parsing algorithm,
6223 // this is a parse error; ignore the token. (fragment case)
6224 //
6225 // Otherwise, switch the insertion mode to "after after body".
6226 Token::EndTag { tag_name, .. } if tag_name == "html" => {
6227 if self.is_fragment_case {
6228 self.errors.push(Error::new(
6229 token_and_info.span,
6230 ErrorKind::StrayEndTag(tag_name.clone()),
6231 ));
6232 } else {
6233 self.update_end_tag_span(
6234 self.open_elements_stack.items.first(),
6235 token_and_info.span,
6236 );
6237 self.insertion_mode = InsertionMode::AfterAfterBody;
6238 }
6239 }
6240 // An end-of-file token
6241 //
6242 // Stop parsing.
6243 Token::Eof => {
6244 self.update_end_tag_span(
6245 self.open_elements_stack.items.last(),
6246 token_and_info.span,
6247 );
6248 self.stopped = true;
6249 }
6250 // Anything else
6251 //
6252 // Parse error. Switch the insertion mode to "in body" and reprocess the token.
6253 _ => {
6254 match token {
6255 // Doctype handled above
6256 // Comment handled above
6257 // EOF handled above
6258 Token::Character { .. } => {
6259 self.errors.push(Error::new(
6260 token_and_info.span,
6261 ErrorKind::NonSpaceCharacterAfterBody,
6262 ));
6263 }
6264 Token::StartTag { tag_name, .. } => {
6265 self.errors.push(Error::new(
6266 token_and_info.span,
6267 ErrorKind::StrayStartTag(tag_name.clone()),
6268 ));
6269 }
6270 Token::EndTag { .. } => {
6271 self.errors.push(Error::new(
6272 token_and_info.span,
6273 ErrorKind::EndTagAfterBody,
6274 ));
6275 }
6276 _ => {
6277 unreachable!();
6278 }
6279 }
6280
6281 self.insertion_mode = InsertionMode::InBody;
6282 self.process_token(token_and_info, None)?;
6283 }
6284 }
6285 }
6286 // The "in frameset" insertion mode
6287 InsertionMode::InFrameset => {
6288 // When the user agent is to apply the rules for the "in frameset" insertion
6289 // mode, the user agent must handle the token as follows:
6290 match token {
6291 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
6292 // FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020
6293 // SPACE
6294 //
6295 // Insert the character.
6296 Token::Character {
6297 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
6298 ..
6299 } => {
6300 self.insert_character(token_and_info)?;
6301 }
6302 // A comment token
6303 //
6304 // Insert a comment.
6305 Token::Comment { .. } => {
6306 self.insert_comment(token_and_info)?;
6307 }
6308 // A DOCTYPE token
6309 //
6310 // Parse error. Ignore the token.
6311 Token::Doctype { .. } => {
6312 self.errors
6313 .push(Error::new(token_and_info.span, ErrorKind::StrayDoctype));
6314 }
6315 // A start tag whose tag name is "html"
6316 //
6317 // Process the token using the rules for the "in body" insertion mode.
6318 Token::StartTag { tag_name, .. } if tag_name == "html" => {
6319 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
6320 }
6321 // A start tag whose tag name is "frameset"
6322 //
6323 // Insert an HTML element for the token.
6324 Token::StartTag {
6325 tag_name,
6326 is_self_closing,
6327 ..
6328 } if tag_name == "frameset" => {
6329 self.insert_html_element(token_and_info)?;
6330 maybe_allow_self_closing!(is_self_closing, tag_name);
6331 }
6332 // An end tag whose tag name is "frameset"
6333 //
6334 // If the current node is the root html element, then this is a parse error;
6335 // ignore the token. (fragment case)
6336 //
6337 // Otherwise, pop the current node from the stack of open elements.
6338 //
6339 // If the parser was not created as part of the HTML fragment parsing algorithm
6340 // (fragment case), and the current node is no longer a frameset element, then
6341 // switch the insertion mode to "after frameset".
6342 Token::EndTag { tag_name, .. } if tag_name == "frameset" => {
6343 let is_root_html_document = match self.open_elements_stack.items.last() {
6344 Some(node)
6345 if is_html_element!(node, "html")
6346 && self.open_elements_stack.items.len() == 1 =>
6347 {
6348 true
6349 }
6350 _ => false,
6351 };
6352
6353 if is_root_html_document {
6354 self.errors.push(Error::new(
6355 token_and_info.span,
6356 ErrorKind::StrayEndTag(tag_name.clone()),
6357 ));
6358 } else {
6359 let popped = self.open_elements_stack.pop();
6360
6361 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
6362
6363 if !self.is_fragment_case {
6364 match self.open_elements_stack.items.last() {
6365 Some(node) if !is_html_element!(node, "frameset") => {
6366 self.insertion_mode = InsertionMode::AfterFrameset;
6367 }
6368 _ => {}
6369 }
6370 }
6371 }
6372 }
6373 // A start tag whose tag name is "frame"
6374 //
6375 // Insert an HTML element for the token. Immediately pop the current node off
6376 // the stack of open elements.
6377 //
6378 // Acknowledge the token's self-closing flag, if it is set.
6379 Token::StartTag {
6380 tag_name,
6381 is_self_closing,
6382 ..
6383 } if tag_name == "frame" => {
6384 let is_self_closing = *is_self_closing;
6385
6386 self.insert_html_element(token_and_info)?;
6387 self.open_elements_stack.pop();
6388
6389 if is_self_closing {
6390 token_and_info.acknowledged = true;
6391 }
6392 }
6393 // A start tag whose tag name is "noframes"
6394 //
6395 // Process the token using the rules for the "in head" insertion mode.
6396 Token::StartTag { tag_name, .. } if tag_name == "noframes" => {
6397 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
6398 }
6399 // An end-of-file token
6400 //
6401 // If the current node is not the root html element, then this is a parse error.
6402 //
6403 // Note: The current node can only be the root html element in the fragment
6404 // case.
6405 //
6406 // Stop parsing.
6407 Token::Eof => {
6408 self.update_end_tag_span(
6409 self.open_elements_stack.items.last(),
6410 token_and_info.span,
6411 );
6412
6413 match self.open_elements_stack.items.last() {
6414 Some(node) if !is_html_element!(node, "html") => {
6415 self.errors.push(Error::new(
6416 token_and_info.span,
6417 ErrorKind::EofWithUnclosedElements,
6418 ));
6419 }
6420 _ => {}
6421 }
6422
6423 self.stopped = true;
6424 }
6425 // Anything else
6426 //
6427 // Parse error. Ignore the token.
6428 _ => match token {
6429 // Doctype handled above
6430 // Comment handled above
6431 // EOF handled above
6432 Token::Character { .. } => {
6433 self.errors.push(Error::new(
6434 token_and_info.span,
6435 ErrorKind::NonSpaceCharacterInFrameset,
6436 ));
6437 }
6438 Token::StartTag { tag_name, .. } => {
6439 self.errors.push(Error::new(
6440 token_and_info.span,
6441 ErrorKind::StrayStartTag(tag_name.clone()),
6442 ));
6443 }
6444 Token::EndTag { tag_name, .. } => {
6445 self.errors.push(Error::new(
6446 token_and_info.span,
6447 ErrorKind::StrayEndTag(tag_name.clone()),
6448 ));
6449 }
6450 _ => {
6451 unreachable!()
6452 }
6453 },
6454 }
6455 }
6456 // The "after frameset" insertion mode
6457 InsertionMode::AfterFrameset => {
6458 // When the user agent is to apply the rules for the "after frameset" insertion
6459 // mode, the user agent must handle the token as follows:
6460 match token {
6461 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
6462 // FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020
6463 // SPACE
6464 //
6465 // Insert the character.
6466 Token::Character {
6467 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
6468 ..
6469 } => {
6470 self.insert_character(token_and_info)?;
6471 }
6472 // A comment token
6473 //
6474 // Insert a comment.
6475 Token::Comment { .. } => {
6476 self.insert_comment(token_and_info)?;
6477 }
6478 // A DOCTYPE token
6479 //
6480 // Parse error. Ignore the token.
6481 Token::Doctype { .. } => {
6482 self.errors
6483 .push(Error::new(token_and_info.span, ErrorKind::StrayDoctype));
6484 }
6485 // A start tag whose tag name is "html"
6486 //
6487 // Process the token using the rules for the "in body" insertion mode.
6488 Token::StartTag { tag_name, .. } if tag_name == "html" => {
6489 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
6490 }
6491 // An end tag whose tag name is "html"
6492 //
6493 // Switch the insertion mode to "after after frameset".
6494 Token::EndTag { tag_name, .. } if tag_name == "html" => {
6495 self.update_end_tag_span(
6496 self.open_elements_stack.items.last(),
6497 token_and_info.span,
6498 );
6499 self.insertion_mode = InsertionMode::AfterAfterFrameset;
6500 }
6501 // A start tag whose tag name is "noframes"
6502 //
6503 // Process the token using the rules for the "in head" insertion mode.
6504 Token::StartTag { tag_name, .. } if tag_name == "noframes" => {
6505 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
6506 }
6507 // An end-of-file token
6508 //
6509 // Stop parsing.
6510 Token::Eof => {
6511 self.stopped = true;
6512 }
6513 // Anything else
6514 //
6515 // Parse error. Ignore the token.
6516 _ => match token {
6517 // Doctype handled above
6518 // Comment handled above
6519 // EOF handled above
6520 Token::Character { .. } => {
6521 self.errors.push(Error::new(
6522 token_and_info.span,
6523 ErrorKind::NonSpaceCharacterAfterFrameset,
6524 ));
6525 }
6526 Token::StartTag { tag_name, .. } => {
6527 self.errors.push(Error::new(
6528 token_and_info.span,
6529 ErrorKind::StrayStartTag(tag_name.clone()),
6530 ));
6531 }
6532 Token::EndTag { tag_name, .. } => {
6533 self.errors.push(Error::new(
6534 token_and_info.span,
6535 ErrorKind::StrayEndTag(tag_name.clone()),
6536 ));
6537 }
6538 _ => {
6539 unreachable!()
6540 }
6541 },
6542 }
6543 }
6544 // The "after after body" insertion mode
6545 InsertionMode::AfterAfterBody => {
6546 // When the user agent is to apply the rules for the "after after body"
6547 // insertion mode, the user agent must handle the token as follows:
6548 match token {
6549 // A comment token
6550 //
6551 // Insert a comment as the last child of the Document object.
6552 Token::Comment { .. } => {
6553 self.insert_comment_as_last_child_of_document(token_and_info)?;
6554 }
6555 // A DOCTYPE token
6556 //
6557 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
6558 // FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020
6559 // SPACE
6560 //
6561 // A start tag whose tag name is "html"
6562 //
6563 // Process the token using the rules for the "in body" insertion mode.
6564 Token::Doctype { .. } => {
6565 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
6566 }
6567 Token::Character {
6568 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
6569 ..
6570 } => {
6571 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
6572 }
6573 Token::StartTag { tag_name, .. } if tag_name == "html" => {
6574 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
6575 }
6576 // An end-of-file token
6577 //
6578 // Stop parsing.
6579 Token::Eof => {
6580 self.stopped = true;
6581 }
6582 // Anything else
6583 //
6584 // Parse error. Switch the insertion mode to "in body" and reprocess the token.
6585 _ => {
6586 match token {
6587 // Doctype handled above
6588 // Comment handled above
6589 // EOF handled above
6590 Token::Character { .. } => {
6591 self.errors.push(Error::new(
6592 token_and_info.span,
6593 ErrorKind::NonSpaceCharacterInTrailer,
6594 ));
6595 }
6596 Token::StartTag { tag_name, .. } => {
6597 self.errors.push(Error::new(
6598 token_and_info.span,
6599 ErrorKind::StrayStartTag(tag_name.clone()),
6600 ));
6601 }
6602 Token::EndTag { tag_name, .. } => {
6603 self.errors.push(Error::new(
6604 token_and_info.span,
6605 ErrorKind::StrayEndTag(tag_name.clone()),
6606 ));
6607 }
6608 _ => {
6609 unreachable!();
6610 }
6611 }
6612
6613 self.insertion_mode = InsertionMode::InBody;
6614 self.process_token(token_and_info, None)?;
6615 }
6616 }
6617 }
6618 // The "after after frameset" insertion mode
6619 InsertionMode::AfterAfterFrameset => {
6620 // When the user agent is to apply the rules for the "after after frameset"
6621 // insertion mode, the user agent must handle the token as follows:
6622 match token {
6623 // A comment token
6624 //
6625 // Insert a comment as the last child of the Document object.
6626 Token::Comment { .. } => {
6627 self.insert_comment_as_last_child_of_document(token_and_info)?;
6628 }
6629 // A DOCTYPE token
6630 //
6631 // A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE
6632 // FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020
6633 // SPACE
6634 //
6635 // A start tag whose tag name is "html"
6636 //
6637 // Process the token using the rules for the "in body" insertion mode.
6638 Token::Doctype { .. } => {
6639 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
6640 }
6641 Token::Character {
6642 value: '\x09' | '\x0A' | '\x0C' | '\x0D' | '\x20',
6643 ..
6644 } => {
6645 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
6646 }
6647 Token::StartTag { tag_name, .. } if tag_name == "html" => {
6648 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
6649 }
6650 // An end-of-file token
6651 //
6652 // Stop parsing.
6653 Token::Eof => {
6654 self.update_end_tag_span(
6655 self.open_elements_stack.items.last(),
6656 token_and_info.span,
6657 );
6658 self.stopped = true;
6659 }
6660 // A start tag whose tag name is "noframes"
6661 //
6662 // Process the token using the rules for the "in head" insertion mode.
6663 Token::StartTag { tag_name, .. } if tag_name == "noframes" => {
6664 self.process_token_using_rules(token_and_info, InsertionMode::InHead)?;
6665 }
6666 // Anything else
6667 //
6668 // Parse error. Ignore the token.
6669 // Anything else
6670 //
6671 // Parse error. Ignore the token.
6672 _ => match token {
6673 // Doctype handled above
6674 // Comment handled above
6675 // EOF handled above
6676 Token::Character { .. } => {
6677 self.errors.push(Error::new(
6678 token_and_info.span,
6679 ErrorKind::NonSpaceCharacterInTrailer,
6680 ));
6681 }
6682 Token::StartTag { tag_name, .. } => {
6683 self.errors.push(Error::new(
6684 token_and_info.span,
6685 ErrorKind::StrayStartTag(tag_name.clone()),
6686 ));
6687 }
6688 Token::EndTag { tag_name, .. } => {
6689 self.errors.push(Error::new(
6690 token_and_info.span,
6691 ErrorKind::StrayEndTag(tag_name.clone()),
6692 ));
6693 }
6694 _ => {
6695 unreachable!();
6696 }
6697 },
6698 }
6699 }
6700 }
6701
6702 Ok(())
6703 }
6704
6705 fn process_token_in_table_insertion_mode_anything_else(
6706 &mut self,
6707 token_and_info: &mut TokenAndInfo,
6708 ) -> PResult<()> {
6709 match &token_and_info.token {
6710 Token::StartTag { tag_name, .. } => {
6711 self.errors.push(Error::new(
6712 token_and_info.span,
6713 ErrorKind::StartTagInTable(tag_name.clone()),
6714 ));
6715 }
6716 Token::EndTag { tag_name, .. } => {
6717 self.errors.push(Error::new(
6718 token_and_info.span,
6719 ErrorKind::StrayEndTag(tag_name.clone()),
6720 ));
6721 }
6722 Token::Character { .. } => {
6723 self.errors.push(Error::new(
6724 token_and_info.span,
6725 ErrorKind::NonSpaceCharacterInTable,
6726 ));
6727 }
6728 _ => {
6729 unreachable!();
6730 }
6731 }
6732
6733 let saved_foster_parenting_state = self.foster_parenting_enabled;
6734
6735 self.foster_parenting_enabled = true;
6736 self.process_token_using_rules(token_and_info, InsertionMode::InBody)?;
6737 self.foster_parenting_enabled = saved_foster_parenting_state;
6738
6739 Ok(())
6740 }
6741
6742 // Any other end tag
6743 //
6744 // Run these steps:
6745 //
6746 // 1. Initialize node to be the current node (the bottommost node of the stack).
6747 //
6748 // 2. Loop: If node is an HTML element with the same tag name as the token,
6749 // then:
6750 //
6751 // 1. Generate implied end tags, except for HTML elements with the same tag
6752 // name as the token.
6753 //
6754 // 2. If node is not the current node, then this is a parse error.
6755 //
6756 // 3. Pop all the nodes from the current node up to node, including node, then
6757 // stop these steps.
6758 //
6759 // 3. Otherwise, if node is in the special category, then this is a parse error;
6760 // ignore the token, and return.
6761 //
6762 // 4. Set node to the previous entry in the stack of open elements.
6763 //
6764 // 5. Return to the step labeled loop.
6765 fn any_other_end_tag_for_in_body_insertion_mode(&mut self, token_and_info: &TokenAndInfo) {
6766 let mut match_idx = None;
6767 let tag_name = match &token_and_info.token {
6768 Token::StartTag { tag_name, .. } | Token::EndTag { tag_name, .. } => tag_name,
6769 _ => {
6770 unreachable!();
6771 }
6772 };
6773
6774 // 1., 2., 4. and 5.
6775 for (i, node) in self.open_elements_stack.items.iter().enumerate().rev() {
6776 if is_html_element_with_tag_name!(node, tag_name) {
6777 match_idx = Some(i);
6778
6779 break;
6780 }
6781
6782 // 3.
6783 if self.is_special_element(node) {
6784 self.errors.push(Error::new(
6785 token_and_info.span,
6786 ErrorKind::StrayEndTag(tag_name.clone()),
6787 ));
6788
6789 return;
6790 }
6791 }
6792
6793 let match_idx = match match_idx {
6794 None => {
6795 self.errors.push(Error::new(
6796 token_and_info.span,
6797 ErrorKind::StrayEndTag(tag_name.clone()),
6798 ));
6799
6800 return;
6801 }
6802 Some(x) => x,
6803 };
6804
6805 // 2. - 1.
6806 self.open_elements_stack
6807 .generate_implied_end_tags_with_exclusion(tag_name);
6808
6809 // 2. - 2.
6810 if match_idx != self.open_elements_stack.items.len() - 1 {
6811 self.errors.push(Error::new(
6812 token_and_info.span,
6813 ErrorKind::UnclosedElements(tag_name.clone()),
6814 ));
6815 } else {
6816 let node = self.open_elements_stack.items.last();
6817
6818 self.update_end_tag_span(node, token_and_info.span);
6819 }
6820
6821 // 2.- 3.
6822 self.open_elements_stack.items.truncate(match_idx);
6823 }
6824
6825 fn process_token_using_rules(
6826 &mut self,
6827 token_and_info: &mut TokenAndInfo,
6828 insertion_mode: InsertionMode,
6829 ) -> PResult<()> {
6830 self.process_token(token_and_info, Some(insertion_mode))?;
6831
6832 Ok(())
6833 }
6834
6835 // When the steps below require the user agent to adjust MathML attributes for a
6836 // token, then, if the token has an attribute named definitionurl, change its
6837 // name to definitionURL (note the case difference).
6838 fn adjust_math_ml_attribute(&self, attribute: &mut Attribute) {
6839 if attribute.name == "definitionurl" {
6840 attribute.name = atom!("definitionURL");
6841 }
6842 }
6843
6844 // When the steps below require the user agent to adjust SVG attributes for a
6845 // token, then, for each attribute on the token whose attribute name is one of
6846 // the ones in the first column of the following table, change the attribute's
6847 // name to the name given in the corresponding cell in the second column. (This
6848 // fixes the case of SVG attributes that are not all lowercase.)
6849 //
6850 // Attribute name on token Attribute name on element
6851 // attributename attributeName
6852 // attributetype attributeType
6853 // basefrequency baseFrequency
6854 // baseprofile baseProfile
6855 // calcmode calcMode
6856 // clippathunits clipPathUnits
6857 // diffuseconstant diffuseConstant
6858 // edgemode edgeMode
6859 // filterunits filterUnits
6860 // glyphref glyphRef
6861 // gradienttransform gradientTransform
6862 // gradientunits gradientUnits
6863 // kernelmatrix kernelMatrix
6864 // kernelunitlength kernelUnitLength
6865 // keypoints keyPoints
6866 // keysplines keySplines
6867 // keytimes keyTimes
6868 // lengthadjust lengthAdjust
6869 // limitingconeangle limitingConeAngle
6870 // markerheight markerHeight
6871 // markerunits markerUnits
6872 // markerwidth markerWidth
6873 // maskcontentunits maskContentUnits
6874 // maskunits maskUnits
6875 // numoctaves numOctaves
6876 // pathlength pathLength
6877 // patterncontentunits patternContentUnits
6878 // patterntransform patternTransform
6879 // patternunits patternUnits
6880 // pointsatx pointsAtX
6881 // pointsaty pointsAtY
6882 // pointsatz pointsAtZ
6883 // preservealpha preserveAlpha
6884 // preserveaspectratio preserveAspectRatio
6885 // primitiveunits primitiveUnits
6886 // refx refX
6887 // refy refY
6888 // repeatcount repeatCount
6889 // repeatdur repeatDur
6890 // requiredextensions requiredExtensions
6891 // requiredfeatures requiredFeatures
6892 // specularconstant specularConstant
6893 // specularexponent specularExponent
6894 // spreadmethod spreadMethod
6895 // startoffset startOffset
6896 // stddeviation stdDeviation
6897 // stitchtiles stitchTiles
6898 // surfacescale surfaceScale
6899 // systemlanguage systemLanguage
6900 // tablevalues tableValues
6901 // targetx targetX
6902 // targety targetY
6903 // textlength textLength
6904 // viewbox viewBox
6905 // viewtarget viewTarget
6906 // xchannelselector xChannelSelector
6907 // ychannelselector yChannelSelector
6908 // zoomandpan zoomAndPan
6909 fn adjust_svg_attribute(&self, attribute: &mut Attribute) {
6910 match &*attribute.name {
6911 "attributename" => attribute.name = atom!("attributeName"),
6912 "attributetype" => attribute.name = atom!("attributeType"),
6913 "basefrequency" => attribute.name = atom!("baseFrequency"),
6914 "baseprofile" => attribute.name = atom!("baseProfile"),
6915 "calcmode" => attribute.name = atom!("calcMode"),
6916 "clippathunits" => attribute.name = atom!("clipPathUnits"),
6917 "diffuseconstant" => attribute.name = atom!("diffuseConstant"),
6918 "edgemode" => attribute.name = atom!("edgeMode"),
6919 "filterunits" => attribute.name = atom!("filterUnits"),
6920 "glyphref" => attribute.name = atom!("glyphRef"),
6921 "gradienttransform" => attribute.name = atom!("gradientTransform"),
6922 "gradientunits" => attribute.name = atom!("gradientUnits"),
6923 "kernelmatrix" => attribute.name = atom!("kernelMatrix"),
6924 "kernelunitlength" => attribute.name = atom!("kernelUnitLength"),
6925 "keypoints" => attribute.name = atom!("keyPoints"),
6926 "keysplines" => attribute.name = atom!("keySplines"),
6927 "keytimes" => attribute.name = atom!("keyTimes"),
6928 "lengthadjust" => attribute.name = atom!("lengthAdjust"),
6929 "limitingconeangle" => attribute.name = atom!("limitingConeAngle"),
6930 "markerheight" => attribute.name = atom!("markerHeight"),
6931 "markerunits" => attribute.name = atom!("markerUnits"),
6932 "markerwidth" => attribute.name = atom!("markerWidth"),
6933 "maskcontentunits" => attribute.name = atom!("maskContentUnits"),
6934 "maskunits" => attribute.name = atom!("maskUnits"),
6935 "numoctaves" => attribute.name = atom!("numOctaves"),
6936 "pathlength" => attribute.name = atom!("pathLength"),
6937 "patterncontentunits" => attribute.name = atom!("patternContentUnits"),
6938 "patterntransform" => attribute.name = atom!("patternTransform"),
6939 "patternunits" => attribute.name = atom!("patternUnits"),
6940 "pointsatx" => attribute.name = atom!("pointsAtX"),
6941 "pointsaty" => attribute.name = atom!("pointsAtY"),
6942 "pointsatz" => attribute.name = atom!("pointsAtZ"),
6943 "preservealpha" => attribute.name = atom!("preserveAlpha"),
6944 "preserveaspectratio" => attribute.name = atom!("preserveAspectRatio"),
6945 "primitiveunits" => attribute.name = atom!("primitiveUnits"),
6946 "refx" => attribute.name = atom!("refX"),
6947 "refy" => attribute.name = atom!("refY"),
6948 "repeatcount" => attribute.name = atom!("repeatCount"),
6949 "repeatdur" => attribute.name = atom!("repeatDur"),
6950 "requiredextensions" => attribute.name = atom!("requiredExtensions"),
6951 "requiredfeatures" => attribute.name = atom!("requiredFeatures"),
6952 "specularconstant" => attribute.name = atom!("specularConstant"),
6953 "specularexponent" => attribute.name = atom!("specularExponent"),
6954 "spreadmethod" => attribute.name = atom!("spreadMethod"),
6955 "startoffset" => attribute.name = atom!("startOffset"),
6956 "stddeviation" => attribute.name = atom!("stdDeviation"),
6957 "stitchtiles" => attribute.name = atom!("stitchTiles"),
6958 "surfacescale" => attribute.name = atom!("surfaceScale"),
6959 "systemlanguage" => attribute.name = atom!("systemLanguage"),
6960 "tablevalues" => attribute.name = atom!("tableValues"),
6961 "targetx" => attribute.name = atom!("targetX"),
6962 "targety" => attribute.name = atom!("targetY"),
6963 "textlength" => attribute.name = atom!("textLength"),
6964 "viewbox" => attribute.name = atom!("viewBox"),
6965 "viewtarget" => attribute.name = atom!("viewTarget"),
6966 "xchannelselector" => attribute.name = atom!("xChannelSelector"),
6967 "ychannelselector" => attribute.name = atom!("yChannelSelector"),
6968 "zoomandpan" => attribute.name = atom!("zoomAndPan"),
6969 _ => {}
6970 }
6971 }
6972
6973 // When the steps below require the user agent to adjust foreign attributes for
6974 // a token, then, if any of the attributes on the token match the strings given
6975 // in the first column of the following table, let the attribute be a namespaced
6976 // attribute, with the prefix being the string given in the corresponding cell
6977 // in the second column, the local name being the string given in the
6978 // corresponding cell in the third column, and the namespace being the namespace
6979 // given in the corresponding cell in the fourth column. (This fixes the use of
6980 // namespaced attributes, in particular lang attributes in the XML namespace.)
6981 //
6982 //
6983 // Attribute name Prefix Local name Namespace
6984 //
6985 // xlink:actuate xlink actuate XLink namespace
6986 // xlink:arcrole xlink arcrole XLink namespace
6987 // xlink:href xlink href XLink namespace
6988 // xlink:role xlink role XLink namespace
6989 // xlink:show xlink show XLink namespace
6990 // xlink:title xlink title XLink namespace
6991 // xlink:type xlink type XLink namespace
6992 // xml:lang xml lang XML namespace
6993 // xml:space xml space XML namespace
6994 // xmlns (none) xmlns XMLNS namespace
6995 // xmlns:xlink xmlns xlink XMLNS namespace
6996 fn adjust_foreign_attribute(&self, attribute: &mut Attribute) {
6997 match &*attribute.name {
6998 "xlink:actuate" => {
6999 attribute.namespace = Some(Namespace::XLINK);
7000 attribute.prefix = Some(atom!("xlink"));
7001 attribute.name = atom!("actuate");
7002 }
7003 "xlink:arcrole" => {
7004 attribute.namespace = Some(Namespace::XLINK);
7005 attribute.prefix = Some(atom!("xlink"));
7006 attribute.name = atom!("arcrole");
7007 }
7008 "xlink:href" => {
7009 attribute.namespace = Some(Namespace::XLINK);
7010 attribute.prefix = Some(atom!("xlink"));
7011 attribute.name = atom!("href");
7012 }
7013 "xlink:role" => {
7014 attribute.namespace = Some(Namespace::XLINK);
7015 attribute.prefix = Some(atom!("xlink"));
7016 attribute.name = atom!("role");
7017 }
7018 "xlink:show" => {
7019 attribute.namespace = Some(Namespace::XLINK);
7020 attribute.prefix = Some(atom!("xlink"));
7021 attribute.name = atom!("show");
7022 }
7023 "xlink:title" => {
7024 attribute.namespace = Some(Namespace::XLINK);
7025 attribute.prefix = Some(atom!("xlink"));
7026 attribute.name = atom!("title");
7027 }
7028 "xlink:type" => {
7029 attribute.namespace = Some(Namespace::XLINK);
7030 attribute.prefix = Some(atom!("xlink"));
7031 attribute.name = atom!("type");
7032 }
7033 "xml:lang" => {
7034 attribute.namespace = Some(Namespace::XML);
7035 attribute.prefix = Some(atom!("xml"));
7036 attribute.name = atom!("lang");
7037 }
7038 "xml:space" => {
7039 attribute.namespace = Some(Namespace::XML);
7040 attribute.prefix = Some(atom!("xml"));
7041 attribute.name = atom!("space");
7042 }
7043 "xmlns" => {
7044 attribute.namespace = Some(Namespace::XMLNS);
7045 attribute.prefix = None;
7046 attribute.name = atom!("xmlns");
7047 }
7048 "xmlns:xlink" => {
7049 attribute.namespace = Some(Namespace::XMLNS);
7050 attribute.prefix = Some(atom!("xmlns"));
7051 attribute.name = atom!("xlink");
7052 }
7053 _ => {}
7054 }
7055 }
7056
7057 fn create_element_for_token(
7058 &self,
7059 token: Token,
7060 span: Span,
7061 namespace: Option<Namespace>,
7062 adjust_attributes: Option<AdjustAttributes>,
7063 ) -> RcNode {
7064 let element = match token {
7065 Token::StartTag {
7066 tag_name,
7067 attributes,
7068 is_self_closing,
7069 ..
7070 }
7071 | Token::EndTag {
7072 tag_name,
7073 attributes,
7074 is_self_closing,
7075 ..
7076 } => {
7077 let attributes = attributes
7078 .into_iter()
7079 .map(|attribute_token| {
7080 let mut attribute = Attribute {
7081 span: attribute_token.span,
7082 namespace: None,
7083 prefix: None,
7084 name: attribute_token.name,
7085 raw_name: attribute_token.raw_name,
7086 value: attribute_token.value,
7087 raw_value: attribute_token.raw_value,
7088 };
7089
7090 match adjust_attributes {
7091 Some(AdjustAttributes::MathML) => {
7092 self.adjust_math_ml_attribute(&mut attribute);
7093 self.adjust_foreign_attribute(&mut attribute);
7094 }
7095 Some(AdjustAttributes::Svg) => {
7096 self.adjust_svg_attribute(&mut attribute);
7097 self.adjust_foreign_attribute(&mut attribute);
7098 }
7099 None => {}
7100 }
7101
7102 attribute
7103 })
7104 .collect();
7105
7106 Data::Element {
7107 tag_name,
7108 namespace: namespace.unwrap(),
7109 attributes: RefCell::new(attributes),
7110 is_self_closing,
7111 }
7112 }
7113 _ => {
7114 unreachable!();
7115 }
7116 };
7117
7118 Node::new(element, span)
7119 }
7120
7121 // The adoption agency algorithm, which takes as its only argument a token token
7122 // for which the algorithm is being run, consists of the following steps:
7123 //
7124 // 1. Let subject be token's tag name.
7125 //
7126 // 2. If the current node is an HTML element whose tag name is subject, and the
7127 // current node is not in the list of active formatting elements, then pop the
7128 // current node off the stack of open elements and return.
7129 //
7130 // 3. Let outer loop counter be 0.
7131 //
7132 // 4. While true:
7133 //
7134 // 1. If outer loop counter is greater than or equal to 8, then return.
7135 //
7136 // 2. Increment outer loop counter by 1.
7137 //
7138 // 3. Let formatting element be the last element in the list of active
7139 // formatting elements that:
7140 //
7141 // is between the end of the list and the last marker in the list, if
7142 // any, or the start of the list otherwise, and has the tag name subject.
7143 //
7144 // If there is no such element, then return and instead act as described
7145 // in the "any other end tag" entry above.
7146 //
7147 // 4. If formatting element is not in the stack of open elements, then this
7148 // is a parse error; remove the element from the list, and return.
7149 //
7150 // 5. If formatting element is in the stack of open elements, but the element
7151 // is not in scope, then this is a parse error; return.
7152 //
7153 // 6. If formatting element is not the current node, this is a parse error.
7154 // (But do not return.)
7155 //
7156 // 7. Let furthest block be the topmost node in the stack of open elements
7157 // that is lower in the stack than formatting element, and is an element
7158 // in the special category. There might not be one.
7159 //
7160 // 8. If there is no furthest block, then the UA must first pop all the nodes
7161 // from the bottom of the stack of open elements, from the current node
7162 // up to and including formatting element, then remove formatting element
7163 // from the list of active formatting elements, and finally return.
7164 //
7165 // 9. Let common ancestor be the element immediately above formatting element
7166 // in the stack of open elements.
7167 //
7168 // 10. Let a bookmark note the position of formatting element in the list of
7169 // active formatting elements relative to the elements on either side of
7170 // it in the list.
7171 //
7172 // 11. Let node and last node be furthest block.
7173 //
7174 // 12. Let inner loop counter be 0.
7175 //
7176 // 13. While true:
7177 //
7178 // 1. Increment inner loop counter by 1.
7179 //
7180 // 2. Let node be the element immediately above node in the stack of open
7181 // elements, or if node is no longer in the stack of open
7182 // elements (e.g. because it got removed by this algorithm), the
7183 // element that was immediately above node in the stack of open
7184 // elements before node was removed.
7185 //
7186 // 3. If node is formatting element, then break.
7187 //
7188 // 4. If inner loop counter is greater than 3 and node is in the list of
7189 // active formatting elements, then remove node from the list of
7190 // active formatting elements.
7191 //
7192 // 5. If node is not in the list of active formatting elements, then
7193 // remove node from the stack of open elements and continue.
7194 //
7195 // 6. Create an element for the token for which the element node was
7196 // created, in the HTML namespace, with common ancestor as the
7197 // intended parent; replace the entry for node in the list of active
7198 // formatting elements with an entry for the new element, replace the
7199 // entry for node in the stack of open elements with an entry for the
7200 // new element, and let node be the new element.
7201 //
7202 // 7. If last node is furthest block, then move the aforementioned
7203 // bookmark to be immediately after the new node in the list of
7204 // active formatting elements.
7205 //
7206 // 8. Append last node to node.
7207 //
7208 // 9. Set last node to node.
7209 //
7210 // 14. Insert whatever last node ended up being in the previous step at the
7211 // appropriate place for inserting a node, but using common ancestor as the
7212 // override target.
7213 //
7214 // 15, Create an element for the token for which formatting element was created,
7215 // in the HTML namespace, with furthest block as the intended parent.
7216 //
7217 // 16. Take all of the child nodes of furthest block and append them to the
7218 // element created in the last step.
7219 //
7220 // 17. Append that new element to furthest block.
7221 //
7222 // 18. Remove formatting element from the list of active formatting elements,
7223 // and insert the new element into the list of active formatting elements at
7224 // the position of the aforementioned bookmark.
7225 //
7226 // 19. Remove formatting element from the stack of open elements, and insert the
7227 // new element into the stack of open elements immediately below the
7228 // position of furthest block in that stack.
7229 //
7230 // This algorithm's name, the "adoption agency algorithm", comes from the way it
7231 // causes elements to change parents, and is in contrast with other possible
7232 // algorithms for dealing with misnested content.
7233 fn run_the_adoption_agency_algorithm(
7234 &mut self,
7235 token_and_info: &TokenAndInfo,
7236 is_closing: bool,
7237 ) -> PResult<()> {
7238 // 1.
7239 let subject = match &token_and_info.token {
7240 Token::StartTag { tag_name, .. } | Token::EndTag { tag_name, .. } => tag_name.clone(),
7241 _ => {
7242 unreachable!();
7243 }
7244 };
7245
7246 // 2.
7247 let last = self.open_elements_stack.items.last();
7248
7249 if let Some(last) = last {
7250 if is_html_element_with_tag_name!(last, &subject)
7251 && self.active_formatting_elements.get_position(last).is_none()
7252 {
7253 let popped = self.open_elements_stack.pop();
7254
7255 if is_closing {
7256 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
7257 }
7258
7259 return Ok(());
7260 }
7261 }
7262
7263 // 3.
7264 let mut counter = 0;
7265
7266 // 4.
7267 loop {
7268 // 1.
7269 if counter >= 8 {
7270 return Ok(());
7271 }
7272
7273 // 2.
7274 counter += 1;
7275
7276 // 3.
7277 let formatting_element = self
7278 .active_formatting_elements
7279 .items
7280 .iter()
7281 .enumerate()
7282 .rev()
7283 .find(|info| match &info.1 {
7284 ActiveFormattingElement::Element(element, _) => {
7285 is_html_element_with_tag_name!(element, &subject)
7286 }
7287 _ => false,
7288 })
7289 .map(|(i, e)| match e {
7290 ActiveFormattingElement::Element(node, token_and_info) => {
7291 (i, node.clone(), token_and_info.clone())
7292 }
7293 _ => {
7294 unreachable!()
7295 }
7296 });
7297
7298 if formatting_element.is_none() {
7299 self.any_other_end_tag_for_in_body_insertion_mode(token_and_info);
7300
7301 return Ok(());
7302 }
7303
7304 let formatting_element = formatting_element.unwrap();
7305
7306 // 4.
7307 let formatting_element_stack_index = self
7308 .open_elements_stack
7309 .items
7310 .iter()
7311 .rposition(|n| is_same_node(n, &formatting_element.1));
7312
7313 if formatting_element_stack_index.is_none() {
7314 self.errors.push(Error::new(
7315 token_and_info.span,
7316 ErrorKind::NoElementToCloseButEndTagSeen(subject),
7317 ));
7318 self.active_formatting_elements
7319 .remove(&formatting_element.1);
7320
7321 return Ok(());
7322 }
7323
7324 // 5.
7325 if formatting_element_stack_index.is_some()
7326 && !self
7327 .open_elements_stack
7328 .has_node_in_scope(&formatting_element.1)
7329 {
7330 self.errors.push(Error::new(
7331 token_and_info.span,
7332 ErrorKind::NoElementToCloseButEndTagSeen(subject),
7333 ));
7334
7335 return Ok(());
7336 }
7337
7338 let formatting_element_stack_index = formatting_element_stack_index.unwrap();
7339
7340 // 6.
7341 if let Some(node) = self.open_elements_stack.items.last() {
7342 if !is_same_node(node, &formatting_element.1) {
7343 self.errors.push(Error::new(
7344 token_and_info.span,
7345 ErrorKind::EndTagViolatesNestingRules(subject.clone()),
7346 ));
7347 }
7348 }
7349
7350 // 7.
7351 let furthest_block = self
7352 .open_elements_stack
7353 .items
7354 .iter()
7355 .enumerate()
7356 .skip(formatting_element_stack_index)
7357 .find(|&(_, open_element)| self.is_special_element(open_element))
7358 .map(|(i, h)| (i, h.clone()));
7359
7360 // 8.
7361 if furthest_block.is_none() {
7362 while let Some(node) = self.open_elements_stack.pop() {
7363 if is_same_node(&node, &formatting_element.1) {
7364 if is_closing {
7365 self.update_end_tag_span(Some(&node), token_and_info.span);
7366 }
7367
7368 break;
7369 }
7370 }
7371
7372 self.active_formatting_elements
7373 .remove(&formatting_element.1);
7374
7375 return Ok(());
7376 }
7377
7378 // 9.
7379 let common_ancestor =
7380 self.open_elements_stack.items[formatting_element_stack_index - 1].clone();
7381
7382 // 10.
7383 let mut bookmark = Bookmark::Replace(formatting_element.1.clone());
7384
7385 // 11.
7386 let furthest_block = furthest_block.unwrap();
7387 let mut node;
7388 let mut node_index = furthest_block.0;
7389 let mut last_node = furthest_block.1.clone();
7390
7391 // 12.
7392 let mut inner_loop_counter = 0;
7393
7394 // 13.
7395 loop {
7396 // 13.1
7397 inner_loop_counter += 1;
7398
7399 // 13.2
7400 node_index -= 1;
7401 node = self.open_elements_stack.items[node_index].clone();
7402
7403 // 13.3
7404 if is_same_node(&node, &formatting_element.1) {
7405 break;
7406 }
7407
7408 // 13.4
7409 let node_formatting_index = self.active_formatting_elements.get_position(&node);
7410 let mut node_in_list = node_formatting_index.is_some();
7411
7412 if inner_loop_counter > 3 && node_in_list {
7413 self.active_formatting_elements.remove(&node);
7414
7415 node_in_list = false;
7416 }
7417
7418 // 13.5
7419 if !node_in_list {
7420 self.open_elements_stack.remove(&node);
7421
7422 continue;
7423 }
7424
7425 // 13.6
7426 let node_formatting_index = node_formatting_index.unwrap();
7427 let token_and_info =
7428 match self.active_formatting_elements.items[node_formatting_index] {
7429 ActiveFormattingElement::Element(ref h, ref t) => {
7430 assert!(is_same_node(h, &node));
7431
7432 t.clone()
7433 }
7434 ActiveFormattingElement::Marker => {
7435 panic!("Found marker during adoption agency")
7436 }
7437 };
7438 let new_element = self.create_element_for_token(
7439 token_and_info.token.clone(),
7440 token_and_info.span,
7441 Some(Namespace::HTML),
7442 None,
7443 );
7444
7445 self.active_formatting_elements.items[node_formatting_index] =
7446 ActiveFormattingElement::Element(new_element.clone(), token_and_info);
7447 self.open_elements_stack
7448 .replace(node_index, new_element.clone());
7449
7450 node = new_element;
7451
7452 // 13.7
7453 if is_same_node(&last_node, &furthest_block.1) {
7454 bookmark = Bookmark::InsertAfter(node.clone());
7455 }
7456
7457 // 13.8
7458 if let Some((parent, i)) = self.get_parent_and_index(&last_node) {
7459 parent.children.borrow_mut().remove(i);
7460 last_node.parent.set(None);
7461 }
7462
7463 self.append_node(&node, last_node);
7464
7465 // 13.9
7466 last_node = node;
7467 }
7468
7469 // 14.
7470 if let Some((parent, i)) = self.get_parent_and_index(&last_node) {
7471 parent.children.borrow_mut().remove(i);
7472
7473 last_node.parent.set(None);
7474 }
7475
7476 let appropriate_place =
7477 self.get_appropriate_place_for_inserting_node(Some(common_ancestor))?;
7478
7479 self.insert_at_position(appropriate_place, last_node.clone());
7480
7481 // 15.
7482 let start_span = match &furthest_block.1.children.borrow().first() {
7483 Some(first) => first.start_span.borrow().lo,
7484 _ => token_and_info.span.lo(),
7485 };
7486 let new_element = self.create_element_for_token(
7487 formatting_element.2.token.clone(),
7488 Span::new(start_span, token_and_info.span.hi()),
7489 Some(Namespace::HTML),
7490 None,
7491 );
7492
7493 // 16.
7494 self.reparent_children(&furthest_block.1, &new_element);
7495
7496 // 17.
7497 self.append_node(&furthest_block.1, new_element.clone());
7498
7499 // 18.
7500 match bookmark {
7501 Bookmark::Replace(to_replace) => {
7502 let index = self
7503 .active_formatting_elements
7504 .get_position(&to_replace)
7505 .expect("bookmark not found in active formatting elements");
7506
7507 self.active_formatting_elements.items[index] =
7508 ActiveFormattingElement::Element(new_element.clone(), formatting_element.2);
7509 }
7510 Bookmark::InsertAfter(previous) => {
7511 let index = self
7512 .active_formatting_elements
7513 .get_position(&previous)
7514 .expect("bookmark not found in active formatting elements")
7515 + 1;
7516
7517 self.active_formatting_elements.items.insert(
7518 index,
7519 ActiveFormattingElement::Element(new_element.clone(), formatting_element.2),
7520 );
7521
7522 let old_index = self
7523 .active_formatting_elements
7524 .get_position(&formatting_element.1)
7525 .expect("formatting element not found in active formatting elements");
7526
7527 self.active_formatting_elements.items.remove(old_index);
7528 }
7529 }
7530
7531 // 19.
7532 self.open_elements_stack.remove(&formatting_element.1);
7533
7534 let new_furthest_block_index = self
7535 .open_elements_stack
7536 .items
7537 .iter()
7538 .position(|n| is_same_node(n, &furthest_block.1))
7539 .expect("furthest block missing from open element stack");
7540
7541 self.open_elements_stack
7542 .insert(new_furthest_block_index + 1, new_element);
7543 }
7544 }
7545
7546 fn reparent_children(&mut self, node: &RcNode, new_parent: &RcNode) {
7547 let mut children = node.children.borrow_mut();
7548 let mut new_children = new_parent.children.borrow_mut();
7549
7550 for child in children.iter() {
7551 let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent)));
7552
7553 // It is possible when new created element doesn't have parent
7554 if let Some(previous_parent) = previous_parent {
7555 assert!(is_same_node(
7556 node,
7557 &previous_parent.upgrade().expect("dangling weak")
7558 ));
7559 }
7560 }
7561
7562 new_children.extend(std::mem::take(&mut *children));
7563 }
7564
7565 // When the steps below require the UA to reconstruct the active formatting
7566 // elements, the UA must perform the following steps:
7567 //
7568 // 1. If there are no entries in the list of active formatting elements, then
7569 // there is nothing to reconstruct; stop this algorithm.
7570 //
7571 // 2. If the last (most recently added) entry in the list of active formatting
7572 // elements is a marker, or if it is an element that is in the stack of open
7573 // elements, then there is nothing to reconstruct; stop this algorithm.
7574 //
7575 // 3. Let entry be the last (most recently added) element in the list of active
7576 // formatting elements.
7577 //
7578 // 4. Rewind: If there are no entries before entry in the list of active
7579 // formatting elements, then jump to the step labeled create.
7580 //
7581 // 5. Let entry be the entry one earlier than entry in the list of active
7582 // formatting elements.
7583 //
7584 // 6. If entry is neither a marker nor an element that is also in the stack of
7585 // open elements, go to the step labeled rewind.
7586 //
7587 // 7. Advance: Let entry be the element one later than entry in the list of
7588 // active formatting elements.
7589 //
7590 // 8. Create: Insert an HTML element for the token for which the element entry
7591 // was created, to obtain new element.
7592 //
7593 // 9. Replace the entry for entry in the list with an entry for new element.
7594 //
7595 // 10. If the entry for new element in the list of active formatting elements is
7596 // not the last entry in the list, return to the step labeled advance.
7597 //
7598 // This has the effect of reopening all the formatting elements that were opened
7599 // in the current body, cell, or caption (whichever is youngest) that haven't
7600 // been explicitly closed.
7601 fn reconstruct_active_formatting_elements(&mut self) -> PResult<()> {
7602 let last = match self.active_formatting_elements.items.last() {
7603 None => {
7604 return Ok(());
7605 }
7606 Some(x) => x,
7607 };
7608
7609 if self.is_marker_or_open(last) {
7610 return Ok(());
7611 }
7612
7613 let mut entry_index = self.active_formatting_elements.items.len() - 1;
7614
7615 loop {
7616 if entry_index == 0 {
7617 break;
7618 }
7619
7620 entry_index -= 1;
7621
7622 if self.is_marker_or_open(&self.active_formatting_elements.items[entry_index]) {
7623 entry_index += 1;
7624
7625 break;
7626 }
7627 }
7628
7629 loop {
7630 let token_and_info = match self.active_formatting_elements.items[entry_index] {
7631 ActiveFormattingElement::Element(_, ref t) => t.clone(),
7632 ActiveFormattingElement::Marker => {
7633 panic!("Found marker during formatting element reconstruction")
7634 }
7635 };
7636
7637 let new_element = self.insert_html_element(&token_and_info)?;
7638
7639 self.active_formatting_elements.items[entry_index] =
7640 ActiveFormattingElement::Element(new_element, token_and_info);
7641
7642 if entry_index == self.active_formatting_elements.items.len() - 1 {
7643 break Ok(());
7644 }
7645
7646 entry_index += 1;
7647 }
7648 }
7649
7650 fn is_marker_or_open(&self, entry: &ActiveFormattingElement) -> bool {
7651 match *entry {
7652 ActiveFormattingElement::Marker => true,
7653 ActiveFormattingElement::Element(ref node, _) => self
7654 .open_elements_stack
7655 .items
7656 .iter()
7657 .rev()
7658 .any(|n| is_same_node(n, node)),
7659 }
7660 }
7661
7662 fn create_fake_html_element(&self) -> RcNode {
7663 Node::new(
7664 Data::Element {
7665 tag_name: atom!("html"),
7666 namespace: Namespace::HTML,
7667 attributes: RefCell::new(Vec::new()),
7668 is_self_closing: false,
7669 },
7670 DUMMY_SP,
7671 )
7672 }
7673
7674 fn create_fake_token_and_info(&self, tag_name: &str, span: Option<Span>) -> TokenAndInfo {
7675 TokenAndInfo {
7676 span: match span {
7677 Some(span) => span,
7678 _ => DUMMY_SP,
7679 },
7680 acknowledged: false,
7681 token: Token::StartTag {
7682 tag_name: tag_name.into(),
7683 raw_tag_name: None,
7684 is_self_closing: false,
7685 attributes: Vec::new(),
7686 },
7687 }
7688 }
7689
7690 // Parsing elements that contain only text
7691 // The generic raw text element parsing algorithm and the generic RCDATA element
7692 // parsing algorithm consist of the following steps. These algorithms are always
7693 // invoked in response to a start tag token.
7694 fn parse_generic_text_element(
7695 &mut self,
7696 token_and_info: &TokenAndInfo,
7697 is_raw_text_element_algorithm: bool,
7698 ) -> PResult<()> {
7699 // Insert an HTML element for the token.
7700 self.insert_html_element(token_and_info)?;
7701
7702 // If the algorithm that was invoked is the generic raw text element
7703 // parsing algorithm, switch the tokenizer to the RAWTEXT state;
7704 // otherwise the algorithm invoked was the generic RCDATA element
7705 // parsing algorithm, switch the tokenizer to the RCDATA state.
7706 if is_raw_text_element_algorithm {
7707 self.input.set_input_state(State::Rawtext);
7708 } else {
7709 self.input.set_input_state(State::Rcdata);
7710 }
7711
7712 // Let the original insertion mode be the current insertion mode.
7713 self.original_insertion_mode = self.insertion_mode.clone();
7714 // Then, switch the insertion mode to "text".
7715 self.insertion_mode = InsertionMode::Text;
7716
7717 Ok(())
7718 }
7719
7720 fn close_p_element(&mut self, token_and_info: &TokenAndInfo, is_close_p: bool) {
7721 // When the steps above say the user agent is to close a p element, it means
7722 // that the user agent must run the following steps:
7723
7724 // 1. Generate implied end tags, except for p elements.
7725 self.open_elements_stack
7726 .generate_implied_end_tags_with_exclusion("p");
7727
7728 // 2. If the current node is not a p element, then this is a parse error.
7729 match self.open_elements_stack.items.last() {
7730 Some(node) if !is_html_element!(node, "p") => {
7731 let tag_name = match &token_and_info.token {
7732 Token::StartTag { tag_name, .. } | Token::EndTag { tag_name, .. } => {
7733 tag_name.clone()
7734 }
7735 _ => {
7736 unreachable!();
7737 }
7738 };
7739
7740 self.errors.push(Error::new(
7741 token_and_info.span,
7742 ErrorKind::UnclosedElementsImplied(tag_name),
7743 ));
7744 }
7745 _ => {}
7746 }
7747
7748 // 3. Pop elements from the stack of open elements until a p element has been
7749 // popped from the stack.
7750 let popped = self.open_elements_stack.pop_until_tag_name_popped(&["p"]);
7751
7752 if is_close_p {
7753 self.update_end_tag_span(popped.as_ref(), token_and_info.span)
7754 }
7755 }
7756
7757 fn close_the_cell(&mut self) {
7758 // Generate implied end tags.
7759 self.open_elements_stack.generate_implied_end_tags();
7760
7761 // If the current node is not now a td element or a th element, then this is a
7762 // parse error.
7763 match self.open_elements_stack.items.last() {
7764 Some(node) if !is_html_element!(node, "td" | "th") => {
7765 self.errors.push(Error::new(
7766 *node.start_span.borrow(),
7767 ErrorKind::UnclosedElementsCell,
7768 ));
7769 }
7770 _ => {}
7771 }
7772
7773 // Pop elements from the stack of open elements stack until a td
7774 // element or a th element has been popped from the stack.
7775 self.open_elements_stack
7776 .pop_until_tag_name_popped(&["td", "th"]);
7777
7778 // Clear the list of active formatting elements up to the last marker.
7779 self.active_formatting_elements.clear_to_last_marker();
7780
7781 // Switch the insertion mode to "in row".
7782 self.insertion_mode = InsertionMode::InRow;
7783
7784 // NOTE: The stack of open elements cannot have both a td and a th
7785 // element in table scope at the same time, nor can it have neither
7786 // when the close the cell algorithm is invoked.
7787 }
7788
7789 fn reset_insertion_mode(&mut self) {
7790 // 1. Let last be false.
7791 let mut last = false;
7792
7793 let mut iter = self.open_elements_stack.items.iter().rev();
7794 let first = self.open_elements_stack.items.first();
7795
7796 // 2. Let node be the last node in the stack of open elements.
7797 while let Some(mut inner_node) = iter.next() {
7798 // 3. Loop: If node is the first node in the stack of open elements, then set
7799 // last to true, and, if the parser was created as part of the HTML fragment
7800 // parsing algorithm (fragment case), set node to the context element passed to
7801 // that algorithm.
7802 if let Some(first) = first {
7803 if is_same_node(first, inner_node) {
7804 last = true;
7805
7806 if self.is_fragment_case {
7807 // Fragment case
7808 if let Some(context_element) = &self.context_element {
7809 inner_node = context_element;
7810 }
7811 }
7812 }
7813 }
7814
7815 // Optimization - logic below only works with HTML namespaces, so we can skip
7816 // extra checks
7817 if get_namespace!(inner_node) != Namespace::HTML {
7818 if last {
7819 self.insertion_mode = InsertionMode::InBody;
7820
7821 return;
7822 }
7823
7824 continue;
7825 }
7826
7827 let tag_name = get_tag_name!(inner_node);
7828
7829 // 4. If node is a select element, run these substeps:
7830 //
7831 // 1. If last is true, jump to the step below labeled done.
7832 //
7833 // 2. Let ancestor be node.
7834 //
7835 // 3. Loop: If ancestor is the first node in the stack of open elements, jump
7836 // to the step below labeled done.
7837 //
7838 // 4. Let ancestor be the node before ancestor in the stack of open elements.
7839 //
7840 // 5. If ancestor is a template node, jump to the step below labeled done.
7841 //
7842 // 6. If ancestor is a table node, switch the insertion mode to "in select in
7843 // table" and return.
7844 //
7845 // 7. Jump back to the step labeled loop.
7846 //
7847 // 8. Done: Switch the insertion mode to "in select" and return.
7848 if tag_name == "select" {
7849 if !last {
7850 let mut ancestor = Some(inner_node);
7851
7852 while ancestor.is_some() {
7853 if let Some(ancestor) = ancestor {
7854 if let Some(first) = first {
7855 if is_same_node(ancestor, first) {
7856 break;
7857 }
7858 }
7859 }
7860
7861 ancestor = iter.next();
7862
7863 if let Some(ancestor) = ancestor {
7864 if is_html_element!(ancestor, "template") {
7865 break;
7866 } else if is_html_element!(ancestor, "table") {
7867 self.insertion_mode = InsertionMode::InSelectInTable;
7868
7869 return;
7870 }
7871 }
7872 }
7873 }
7874
7875 self.insertion_mode = InsertionMode::InSelect;
7876
7877 return;
7878 }
7879
7880 // 5. If node is a td or th element and last is false, then switch the insertion
7881 // mode to "in cell" and return.
7882 if (tag_name == "td" || tag_name == "th") && !last {
7883 self.insertion_mode = InsertionMode::InCell;
7884
7885 return;
7886 }
7887
7888 // 6. If node is a tr element, then switch the insertion mode to "in row" and
7889 // return.
7890 if tag_name == "tr" {
7891 self.insertion_mode = InsertionMode::InRow;
7892
7893 return;
7894 }
7895
7896 // 7. If node is a tbody, thead, or tfoot element, then switch the insertion
7897 // mode to "in table body" and return.
7898 if tag_name == "tbody" || tag_name == "thead" || tag_name == "tfoot" {
7899 self.insertion_mode = InsertionMode::InTableBody;
7900
7901 return;
7902 }
7903
7904 // 8. If node is a caption element, then switch the insertion mode to "in
7905 // caption" and return.
7906 if tag_name == "caption" {
7907 self.insertion_mode = InsertionMode::InCaption;
7908
7909 return;
7910 }
7911
7912 // 9. If node is a colgroup element, then switch the insertion mode to "in
7913 // column group" and return.
7914 if tag_name == "colgroup" {
7915 self.insertion_mode = InsertionMode::InColumnGroup;
7916
7917 return;
7918 }
7919
7920 // // 10. If node is a table element, then switch the insertion mode to "in
7921 // table" and return.
7922 if tag_name == "table" {
7923 self.insertion_mode = InsertionMode::InTable;
7924
7925 return;
7926 }
7927
7928 // 11. If node is a template element, then switch the insertion mode to the
7929 // current template insertion mode and return.
7930 if tag_name == "template" {
7931 if let Some(last) = self.template_insertion_mode_stack.last() {
7932 self.insertion_mode = last.clone();
7933
7934 return;
7935 }
7936 }
7937
7938 // 12. If node is a head element and last is false, then switch the insertion
7939 // mode to "in head" and return.
7940 if tag_name == "head" && !last {
7941 self.insertion_mode = InsertionMode::InHead;
7942
7943 return;
7944 }
7945
7946 // 13. If node is a body element, then switch the insertion mode to "in body"
7947 // and return.
7948 if tag_name == "body" {
7949 self.insertion_mode = InsertionMode::InBody;
7950
7951 return;
7952 }
7953
7954 // 14. If node is a frameset element, then switch the insertion mode to "in
7955 // frameset" and return. (fragment case)
7956 if tag_name == "frameset" {
7957 self.insertion_mode = InsertionMode::InFrameset;
7958
7959 return;
7960 }
7961
7962 // 15. If node is an html element, run these substeps:
7963 //
7964 // 1. If the head element pointer is null, switch the insertion mode to
7965 // "before head" and return. (fragment case)
7966 //
7967 // 2. Otherwise, the head element pointer is not null, switch the insertion
7968 // mode to "after head" and return.
7969 if tag_name == "html" {
7970 if self.head_element_pointer.is_none() {
7971 // Fragment case
7972 self.insertion_mode = InsertionMode::BeforeHead;
7973 } else {
7974 self.insertion_mode = InsertionMode::AfterHead;
7975 }
7976
7977 return;
7978 }
7979
7980 // 16. If last is true, then switch the insertion mode to "in body" and return.
7981 // (fragment case)
7982 if last {
7983 self.insertion_mode = InsertionMode::InBody;
7984
7985 return;
7986 }
7987
7988 // 17. Let node now be the node before node in the stack of open
7989 // elements.
7990 //
7991 // 18. Return to the step labeled loop.
7992 }
7993 }
7994
7995 fn set_document_mode(&mut self, document_mode: DocumentMode) {
7996 if let Some(document) = &self.document {
7997 match &document.data {
7998 Data::Document { mode, .. } => {
7999 let mut mode = mode.borrow_mut();
8000
8001 *mode = document_mode;
8002 }
8003 _ => {
8004 unreachable!();
8005 }
8006 }
8007 }
8008 }
8009
8010 fn is_special_element(&self, node: &RcNode) -> bool {
8011 if is_html_element!(
8012 node,
8013 "address"
8014 | "applet"
8015 | "area"
8016 | "article"
8017 | "aside"
8018 | "base"
8019 | "basefont"
8020 | "bgsound"
8021 | "blockquote"
8022 | "body"
8023 | "br"
8024 | "button"
8025 | "caption"
8026 | "center"
8027 | "col"
8028 | "colgroup"
8029 | "dd"
8030 | "details"
8031 | "dir"
8032 | "div"
8033 | "dl"
8034 | "dt"
8035 | "embed"
8036 | "fieldset"
8037 | "figcaption"
8038 | "figure"
8039 | "footer"
8040 | "form"
8041 | "frame"
8042 | "frameset"
8043 | "h1"
8044 | "h2"
8045 | "h3"
8046 | "h4"
8047 | "h5"
8048 | "h6"
8049 | "head"
8050 | "header"
8051 | "hgroup"
8052 | "hr"
8053 | "html"
8054 | "iframe"
8055 | "img"
8056 | "input"
8057 | "keygen"
8058 | "li"
8059 | "link"
8060 | "listing"
8061 | "main"
8062 | "marquee"
8063 | "menu"
8064 | "meta"
8065 | "nav"
8066 | "noembed"
8067 | "noframes"
8068 | "noscript"
8069 | "object"
8070 | "ol"
8071 | "p"
8072 | "param"
8073 | "plaintext"
8074 | "pre"
8075 | "script"
8076 | "section"
8077 | "select"
8078 | "source"
8079 | "style"
8080 | "summary"
8081 | "table"
8082 | "tbody"
8083 | "td"
8084 | "template"
8085 | "textarea"
8086 | "tfoot"
8087 | "th"
8088 | "thead"
8089 | "title"
8090 | "tr"
8091 | "track"
8092 | "ul"
8093 | "wbr"
8094 | "xmp"
8095 ) || is_mathml_element!(node, "mi" | "mo" | "mn" | "ms" | "mtext" | "annotation-xml")
8096 || is_svg_element!(node, "title" | "foreignObject" | "desc")
8097 {
8098 return true;
8099 }
8100
8101 false
8102 }
8103
8104 // While the parser is processing a token, it can enable or disable foster
8105 // parenting. This affects the following algorithm.
8106 //
8107 // The appropriate place for inserting a node, optionally using a particular
8108 // override target, is the position in an element returned by running the
8109 // following steps:
8110 //
8111 // 1. If there was an override target specified, then let target be the override
8112 // target.
8113 //
8114 // Otherwise, let target be the current node.
8115 //
8116 // 2. Determine the adjusted insertion location using the first matching steps
8117 // from the following list:
8118 //
8119 // If foster parenting is enabled and target is a table, tbody, tfoot, thead, or
8120 // tr element Foster parenting happens when content is misnested in tables.
8121 //
8122 // Run these substeps:
8123 //
8124 // 1. Let last template be the last template element in the stack of open
8125 // elements, if any.
8126 //
8127 // 2. Let last table be the last table element in the stack of open elements, if
8128 // any.
8129 //
8130 // 3. If there is a last template and either there is no last table, or there is
8131 // one, but last template is lower (more recently added) than last table in the
8132 // stack of open elements, then: let adjusted insertion location be inside last
8133 // template's template contents, after its last child (if any), and abort these
8134 // steps.
8135 //
8136 // 4. If there is no last table, then let adjusted insertion location be inside
8137 // the first element in the stack of open elements (the html element), after
8138 // its last child (if any), and abort these steps. (fragment case)
8139 //
8140 // 5. If last table has a parent node, then let adjusted insertion location be
8141 // inside last table's parent node, immediately before last table, and abort
8142 // these steps.
8143 //
8144 // 6. Let previous element be the element immediately above last table in the
8145 // stack of open elements.
8146 //
8147 // 7. Let adjusted insertion location be inside previous element, after its last
8148 // child (if any).
8149 //
8150 // These steps are involved in part because it's possible for elements, the
8151 // table element in this case in particular, to have been moved by a script
8152 // around in the DOM, or indeed removed from the DOM entirely, after the element
8153 // was inserted by the parser.
8154 //
8155 // Otherwise
8156 // Let adjusted insertion location be inside target, after its last child (if
8157 // any).
8158 //
8159 // 3. If the adjusted insertion location is inside a template element, let it
8160 // instead be inside the template element's template contents, after its last
8161 // child (if any).
8162 //
8163 // 4. Return the adjusted insertion location.
8164 fn get_appropriate_place_for_inserting_node(
8165 &mut self,
8166 override_target: Option<RcNode>,
8167 ) -> PResult<InsertionPosition> {
8168 // 1.
8169 let target = override_target.unwrap_or_else(|| {
8170 if let Some(last) = self.open_elements_stack.items.last() {
8171 last.clone()
8172 } else {
8173 // Unreachable, because we always have `html` element on top
8174 unreachable!();
8175 }
8176 });
8177
8178 // 2.
8179 let mut adjusted_insertion_location = if self.foster_parenting_enabled
8180 && is_html_element!(target, "table" | "tbody" | "tfoot" | "thead" | "tr")
8181 {
8182 // 2.1
8183 let mut last_template = None;
8184 let mut last_template_index = 0;
8185
8186 // 2.2
8187 let mut last_table = None;
8188 let mut last_table_index = 0;
8189
8190 for (i, node) in self.open_elements_stack.items.iter().enumerate().rev() {
8191 if is_html_element!(node, "template") && last_template.is_none() {
8192 last_template = Some(node);
8193 last_template_index = i;
8194
8195 if last_table.is_some() {
8196 break;
8197 }
8198 } else if is_html_element!(node, "table") && last_table.is_none() {
8199 last_table = Some(node);
8200 last_table_index = i;
8201
8202 if last_template.is_some() {
8203 break;
8204 }
8205 }
8206 }
8207
8208 // 2.3
8209 if (last_table.is_none()
8210 || (last_table.is_some() && last_template_index > last_table_index))
8211 && last_template.is_some()
8212 {
8213 let last_template = if let Some(last_template) = last_template {
8214 last_template.clone()
8215 } else {
8216 unreachable!();
8217 };
8218
8219 InsertionPosition::LastChild(last_template)
8220 }
8221 // 2.4
8222 // Fragment case
8223 else if last_table.is_none() && !self.open_elements_stack.items.is_empty() {
8224 let first = if let Some(first) = self.open_elements_stack.items.first() {
8225 first.clone()
8226 } else {
8227 unreachable!();
8228 };
8229
8230 InsertionPosition::LastChild(first)
8231 }
8232 // 2.5
8233 else if match last_table {
8234 Some(last_table) => {
8235 let parent = last_table.parent.take();
8236 let has_parent = parent.is_some();
8237
8238 last_table.parent.set(parent);
8239
8240 has_parent
8241 }
8242 _ => false,
8243 } {
8244 let sibling =
8245 if let Some(sibling) = self.open_elements_stack.items.get(last_table_index) {
8246 sibling.clone()
8247 } else {
8248 unreachable!()
8249 };
8250
8251 InsertionPosition::BeforeSibling(sibling)
8252 } else {
8253 // 2.6
8254 let previous_element = if let Some(previous_element) =
8255 self.open_elements_stack.items.get(last_table_index - 1)
8256 {
8257 previous_element.clone()
8258 } else {
8259 unreachable!()
8260 };
8261
8262 // 2.7
8263 InsertionPosition::LastChild(previous_element)
8264 }
8265 } else {
8266 InsertionPosition::LastChild(target)
8267 };
8268
8269 // 3.
8270 adjusted_insertion_location = match &adjusted_insertion_location {
8271 InsertionPosition::LastChild(node) | InsertionPosition::BeforeSibling(node) => {
8272 match &node.data {
8273 Data::Element {
8274 namespace,
8275 tag_name,
8276 ..
8277 } if tag_name == "template" && *namespace == Namespace::HTML => {
8278 adjusted_insertion_location
8279 }
8280 _ => adjusted_insertion_location,
8281 }
8282 }
8283 };
8284
8285 // 4.
8286 Ok(adjusted_insertion_location)
8287 }
8288
8289 // Inserts a comment node in to the document while processing a comment token.
8290 fn insert_comment(&mut self, token_and_info: &mut TokenAndInfo) -> PResult<()> {
8291 // Let data be the data given in the comment token being processed.
8292 // If position was specified, then let the adjusted insertion location
8293 // be position. Otherwise, let adjusted insertion location be the
8294 // appropriate place for inserting a node.
8295 let adjusted_insertion_location = self.get_appropriate_place_for_inserting_node(None)?;
8296
8297 // Create a Comment node whose data attribute is set to data and whose
8298 // node document is the same as that of the node in which the adjusted
8299 // insertion location finds itself.
8300 let (data, raw) = match &token_and_info.token {
8301 Token::Comment { data, raw } => (data.clone(), raw.clone()),
8302 _ => {
8303 unreachable!()
8304 }
8305 };
8306
8307 let comment = Node::new(Data::Comment { data, raw }, token_and_info.span);
8308
8309 // Insert the newly created node at the adjusted insertion location.
8310 self.insert_at_position(adjusted_insertion_location, comment);
8311
8312 Ok(())
8313 }
8314
8315 fn insert_comment_as_last_child_of_document(
8316 &mut self,
8317 token_and_info: &mut TokenAndInfo,
8318 ) -> PResult<()> {
8319 let (data, raw) = match &token_and_info.token {
8320 Token::Comment { data, raw } => (data.clone(), raw.clone()),
8321 _ => {
8322 unreachable!()
8323 }
8324 };
8325
8326 let comment = Node::new(Data::Comment { data, raw }, token_and_info.span);
8327
8328 if let Some(document) = &self.document {
8329 self.append_node(document, comment);
8330 }
8331
8332 Ok(())
8333 }
8334
8335 fn insert_comment_as_last_child_of_first_element(
8336 &mut self,
8337 token_and_info: &mut TokenAndInfo,
8338 ) -> PResult<()> {
8339 let (data, raw) = match &token_and_info.token {
8340 Token::Comment { data, raw } => (data.clone(), raw.clone()),
8341 _ => {
8342 unreachable!()
8343 }
8344 };
8345
8346 let comment = Node::new(Data::Comment { data, raw }, token_and_info.span);
8347
8348 if let Some(html) = &self.open_elements_stack.items.first() {
8349 self.append_node(html, comment);
8350 }
8351
8352 Ok(())
8353 }
8354
8355 // Inserts a sequence of characters in to a preexisting text node or creates
8356 // a new text node if one does not exist in the expected insertion location.
8357 fn insert_character(&mut self, token_and_info: &mut TokenAndInfo) -> PResult<()> {
8358 // Let data be the characters passed to the algorithm, or, if no
8359 // characters were explicitly specified, the character of the character
8360 // token being processed.
8361
8362 // Let the adjusted insertion location be the appropriate place for
8363 // inserting a node.
8364 let adjusted_insertion_location = self.get_appropriate_place_for_inserting_node(None)?;
8365
8366 // If the adjusted insertion location is in a Document node, then abort
8367 // these steps.
8368 // NOTE: The DOM will not let Document nodes have Text node children, so
8369 // they are dropped on the floor.
8370 // Note: we don't use document in stack elements, so we can't have Document here
8371
8372 // If there is a Text node immediately before the adjusted insertion location,
8373 // then append data to that Text node's data. Otherwise, create
8374 // a new Text node whose data is data and whose node document is the
8375 // same as that of the element in which the adjusted insertion location
8376 // finds itself, and insert the newly created node at the adjusted
8377 // insertion location.
8378 match &adjusted_insertion_location {
8379 InsertionPosition::LastChild(parent) => {
8380 let children = parent.children.borrow();
8381
8382 if let Some(last) = children.last() {
8383 if let Data::Text {
8384 data,
8385 raw: raw_data,
8386 } = &last.data
8387 {
8388 match &token_and_info.token {
8389 Token::Character {
8390 value: c,
8391 raw: raw_c,
8392 } => {
8393 data.borrow_mut().push(*c);
8394
8395 if let Some(Raw::Same) = raw_c {
8396 raw_data.borrow_mut().push(*c);
8397 } else if let Some(Raw::Atom(raw_c)) = raw_c {
8398 raw_data.borrow_mut().push_str(raw_c);
8399 }
8400 }
8401 _ => {
8402 unreachable!();
8403 }
8404 }
8405
8406 let mut span = last.end_span.borrow_mut();
8407
8408 *span = Some(token_and_info.span);
8409
8410 return Ok(());
8411 }
8412 }
8413 }
8414 InsertionPosition::BeforeSibling(node) => {
8415 if let Some((parent, i)) = self.get_parent_and_index(node) {
8416 if i > 0 {
8417 let children = parent.children.borrow();
8418
8419 if let Some(previous) = children.get(i - 1) {
8420 if let Data::Text {
8421 data,
8422 raw: raw_data,
8423 } = &previous.data
8424 {
8425 match &token_and_info.token {
8426 Token::Character {
8427 value: c,
8428 raw: raw_c,
8429 } => {
8430 data.borrow_mut().push(*c);
8431
8432 if let Some(Raw::Same) = raw_c {
8433 raw_data.borrow_mut().push(*c);
8434 } else if let Some(Raw::Atom(raw_c)) = raw_c {
8435 raw_data.borrow_mut().push_str(raw_c);
8436 }
8437 }
8438 _ => {
8439 unreachable!();
8440 }
8441 }
8442
8443 let mut span = previous.end_span.borrow_mut();
8444
8445 *span = Some(token_and_info.span);
8446
8447 return Ok(());
8448 }
8449 }
8450 }
8451 }
8452 }
8453 }
8454
8455 // Otherwise, create a new Text node whose data is data and whose node document
8456 // is the same as that of the element in which the adjusted insertion location
8457 // finds itself, and insert the newly created node at the adjusted insertion
8458 // location.
8459 let (data, raw) = match &token_and_info.token {
8460 Token::Character {
8461 value: c,
8462 raw: raw_c,
8463 } => {
8464 let mut data = String::with_capacity(64);
8465
8466 data.push(*c);
8467
8468 let mut raw = String::with_capacity(64);
8469
8470 if let Some(Raw::Same) = raw_c {
8471 raw.push(*c);
8472 } else if let Some(Raw::Atom(raw_c)) = raw_c {
8473 raw.push_str(raw_c);
8474 }
8475
8476 (RefCell::new(data), RefCell::new(raw))
8477 }
8478 _ => {
8479 unreachable!()
8480 }
8481 };
8482
8483 let text = Node::new(Data::Text { data, raw }, token_and_info.span);
8484
8485 self.insert_at_position(adjusted_insertion_location, text);
8486
8487 Ok(())
8488 }
8489
8490 fn insert_html_element(&mut self, token_and_info: &TokenAndInfo) -> PResult<RcNode> {
8491 self.insert_foreign_element(token_and_info, Namespace::HTML, None)
8492 }
8493
8494 fn insert_foreign_element(
8495 &mut self,
8496 token_and_info: &TokenAndInfo,
8497 namespace: Namespace,
8498 adjust_attributes: Option<AdjustAttributes>,
8499 ) -> PResult<RcNode> {
8500 // Let the adjusted insertion location be the appropriate place for
8501 // inserting a node.
8502 let adjusted_insertion_location = self.get_appropriate_place_for_inserting_node(None)?;
8503
8504 // Create an element for the token in the given namespace, with the
8505 // intended parent being the element in which the adjusted insertion
8506 // location finds itself.
8507 let node = self.create_element_for_token(
8508 token_and_info.token.clone(),
8509 token_and_info.span,
8510 Some(namespace),
8511 adjust_attributes,
8512 );
8513
8514 // If it is possible to insert an element at the adjusted insertion
8515 // location, then insert the newly created element at the adjusted
8516 // insertion location.
8517 // NOTE: If the adjusted insertion location cannot accept more
8518 // elements, e.g. because it's a Document that already has an
8519 // element child, then the newly created element is dropped on the
8520 // floor.
8521 self.insert_at_position(adjusted_insertion_location, node.clone());
8522
8523 // Push the element onto the stack of open elements so that it is the
8524 // new current node.
8525 self.open_elements_stack.push(node.clone());
8526
8527 // Return the newly created element.
8528 Ok(node)
8529 }
8530
8531 fn append_node(&self, parent: &RcNode, child: RcNode) {
8532 let previous_parent = child.parent.replace(Some(Rc::downgrade(parent)));
8533
8534 // Invariant: child cannot have existing parent
8535 assert!(previous_parent.is_none());
8536
8537 parent.children.borrow_mut().push(child);
8538 }
8539
8540 fn get_parent_and_index(&self, node: &RcNode) -> Option<(RcNode, usize)> {
8541 if let Some(weak) = node.parent.take() {
8542 let parent = weak.upgrade().expect("dangling weak pointer");
8543
8544 node.parent.set(Some(weak));
8545
8546 let i = match parent
8547 .children
8548 .borrow()
8549 .iter()
8550 .enumerate()
8551 .find(|&(_, child)| is_same_node(child, node))
8552 {
8553 Some((i, _)) => i,
8554 None => {
8555 // Unreachable, otherwise node has a parent but couldn't found in parent's
8556 // children
8557 unreachable!();
8558 }
8559 };
8560 Some((parent, i))
8561 } else {
8562 None
8563 }
8564 }
8565
8566 fn append_node_before_sibling(&self, parent: &RcNode, child: RcNode) {
8567 let (parent, i) = self
8568 .get_parent_and_index(parent)
8569 .expect("append_node_before_sibling called on node without parent");
8570
8571 if let Some((parent, i)) = self.get_parent_and_index(&child) {
8572 parent.children.borrow_mut().remove(i);
8573
8574 child.parent.set(None);
8575 }
8576
8577 child.parent.set(Some(Rc::downgrade(&parent)));
8578 parent.children.borrow_mut().insert(i, child);
8579 }
8580
8581 fn insert_at_position(&mut self, insertion_point: InsertionPosition, node: RcNode) {
8582 match insertion_point {
8583 InsertionPosition::LastChild(parent) => {
8584 self.append_node(&parent, node);
8585 }
8586 InsertionPosition::BeforeSibling(sibling) => {
8587 self.append_node_before_sibling(&sibling, node)
8588 }
8589 }
8590 }
8591
8592 fn update_end_tag_span(&self, node: Option<&RcNode>, span: Span) {
8593 if let Some(node) = node {
8594 if node.start_span.borrow().is_dummy() {
8595 return;
8596 }
8597
8598 let mut end_tag_span = node.end_span.borrow_mut();
8599
8600 *end_tag_span = Some(span);
8601 }
8602 }
8603}
8604
8605fn is_same_node(a: &RcNode, b: &RcNode) -> bool {
8606 Rc::ptr_eq(a, b)
8607}
8608
8609// The HTML namespace is "http://www.w3.org/1999/xhtml".
8610fn is_element_in_html_namespace(node: Option<&RcNode>) -> bool {
8611 if let Some(node) = node {
8612 match &node.data {
8613 Data::Element { namespace, .. } if *namespace == Namespace::HTML => {
8614 return true;
8615 }
8616 _ => {
8617 return false;
8618 }
8619 }
8620 }
8621
8622 false
8623}
8624
8625// A node is a MathML text integration point if it is one of the following
8626// elements:
8627//
8628// A MathML mi element
8629// A MathML mo element
8630// A MathML mn element
8631// A MathML ms element
8632// A MathML mtext element
8633fn is_mathml_text_integration_point(node: Option<&RcNode>) -> bool {
8634 if let Some(node) = node {
8635 match &node.data {
8636 Data::Element {
8637 namespace,
8638 tag_name,
8639 ..
8640 } if *namespace == Namespace::MATHML
8641 && matches!(&**tag_name, "mi" | "mo" | "mn" | "ms" | "mtext") =>
8642 {
8643 return true;
8644 }
8645 _ => {
8646 return false;
8647 }
8648 }
8649 }
8650
8651 false
8652}
8653
8654fn is_mathml_annotation_xml(node: Option<&RcNode>) -> bool {
8655 if let Some(node) = node {
8656 match &node.data {
8657 Data::Element {
8658 namespace,
8659 tag_name,
8660 ..
8661 } if *namespace == Namespace::MATHML && tag_name == "annotation-xml" => {
8662 return true;
8663 }
8664 _ => {
8665 return false;
8666 }
8667 }
8668 }
8669
8670 false
8671}
8672
8673// A node is an HTML integration point if it is one of the following elements:
8674//
8675// A MathML annotation-xml element whose start tag token had an attribute with
8676// the name "encoding" whose value was an ASCII case-insensitive match for the
8677// string "text/html" A MathML annotation-xml element whose start tag token
8678// had an attribute with the name "encoding" whose value was an ASCII
8679// case-insensitive match for the string "application/xhtml+xml"
8680// An SVG foreignObject element
8681// An SVG desc element
8682// An SVG title element
8683fn is_html_integration_point(node: Option<&RcNode>) -> bool {
8684 if let Some(node) = node {
8685 match &node.data {
8686 Data::Element {
8687 namespace,
8688 tag_name,
8689 attributes,
8690 ..
8691 } if *namespace == Namespace::MATHML && tag_name == "annotation-xml" => {
8692 for attribute in &*attributes.borrow() {
8693 if &*attribute.name == "encoding"
8694 && (attribute.value.is_some()
8695 && matches!(
8696 &*attribute.value.as_ref().unwrap().to_ascii_lowercase(),
8697 "text/html" | "application/xhtml+xml"
8698 ))
8699 {
8700 return true;
8701 }
8702 }
8703
8704 return false;
8705 }
8706 Data::Element {
8707 namespace,
8708 tag_name,
8709 ..
8710 } if *namespace == Namespace::SVG
8711 && matches!(&**tag_name, "foreignObject" | "desc" | "title") =>
8712 {
8713 return true;
8714 }
8715 _ => {
8716 return false;
8717 }
8718 }
8719 }
8720
8721 false
8722}