swc_html_codegen/
lib.rs

1#![deny(clippy::all)]
2#![allow(clippy::needless_update)]
3#![allow(clippy::match_like_matches_macro)]
4#![allow(non_local_definitions)]
5
6pub use std::fmt::Result;
7use std::{borrow::Cow, iter::Peekable, str::Chars};
8
9use swc_atoms::Atom;
10use swc_common::Spanned;
11use swc_html_ast::*;
12use swc_html_codegen_macros::emitter;
13use swc_html_utils::HTML_ENTITIES;
14use writer::HtmlWriter;
15
16pub use self::emit::*;
17use self::{ctx::Ctx, list::ListFormat};
18
19#[macro_use]
20mod macros;
21mod ctx;
22mod emit;
23mod list;
24pub mod writer;
25
26#[derive(Debug, Clone, Default)]
27pub struct CodegenConfig<'a> {
28    pub minify: bool,
29    pub scripting_enabled: bool,
30    /// Should be used only for `DocumentFragment` code generation
31    pub context_element: Option<&'a Element>,
32    /// Don't print optional tags (only when `minify` enabled)
33    /// By default `true` when `minify` enabled, otherwise `false`
34    pub tag_omission: Option<bool>,
35    /// Keep <head> tags and </body> closing tag when `tag_omission` is enabled
36    pub keep_head_and_body: Option<bool>,
37    /// Making SVG and MathML elements self-closing where possible (only when
38    /// `minify` enabled) By default `false` when `minify` enabled,
39    /// otherwise `true`
40    pub self_closing_void_elements: Option<bool>,
41    /// Always print quotes or remove them where possible (only when `minify`
42    /// enabled) By default `false` when `minify` enabled, otherwise `true`
43    pub quotes: Option<bool>,
44}
45
46enum TagOmissionParent<'a> {
47    Document(&'a Document),
48    DocumentFragment(&'a DocumentFragment),
49    Element(&'a Element),
50}
51
52#[derive(Debug)]
53pub struct CodeGenerator<'a, W>
54where
55    W: HtmlWriter,
56{
57    wr: W,
58    config: CodegenConfig<'a>,
59    ctx: Ctx,
60    // For legacy `<plaintext>`
61    is_plaintext: bool,
62    tag_omission: bool,
63    keep_head_and_body: bool,
64    self_closing_void_elements: bool,
65    quotes: bool,
66}
67
68impl<'a, W> CodeGenerator<'a, W>
69where
70    W: HtmlWriter,
71{
72    pub fn new(wr: W, config: CodegenConfig<'a>) -> Self {
73        let tag_omission = config.tag_omission.unwrap_or(config.minify);
74        let keep_head_and_body = config.keep_head_and_body.unwrap_or(false);
75        let self_closing_void_elements = config.tag_omission.unwrap_or(!config.minify);
76        let quotes = config.quotes.unwrap_or(!config.minify);
77
78        CodeGenerator {
79            wr,
80            config,
81            ctx: Default::default(),
82            is_plaintext: false,
83            tag_omission,
84            keep_head_and_body,
85            self_closing_void_elements,
86            quotes,
87        }
88    }
89
90    #[emitter]
91    fn emit_document(&mut self, n: &Document) -> Result {
92        if self.tag_omission {
93            self.emit_list_for_tag_omission(TagOmissionParent::Document(n))?;
94        } else {
95            self.emit_list(&n.children, ListFormat::NotDelimited)?;
96        }
97    }
98
99    #[emitter]
100    fn emit_document_fragment(&mut self, n: &DocumentFragment) -> Result {
101        let ctx = if let Some(context_element) = &self.config.context_element {
102            self.create_context_for_element(context_element)
103        } else {
104            Default::default()
105        };
106
107        if self.tag_omission {
108            self.with_ctx(ctx)
109                .emit_list_for_tag_omission(TagOmissionParent::DocumentFragment(n))?;
110        } else {
111            self.with_ctx(ctx)
112                .emit_list(&n.children, ListFormat::NotDelimited)?;
113        }
114    }
115
116    #[emitter]
117    fn emit_child(&mut self, n: &Child) -> Result {
118        match n {
119            Child::DocumentType(n) => emit!(self, n),
120            Child::Element(n) => emit!(self, n),
121            Child::Text(n) => emit!(self, n),
122            Child::Comment(n) => emit!(self, n),
123        }
124    }
125
126    #[emitter]
127    fn emit_document_doctype(&mut self, n: &DocumentType) -> Result {
128        let mut doctype = String::with_capacity(
129            10 + if let Some(name) = &n.name {
130                name.len() + 1
131            } else {
132                0
133            } + if let Some(public_id) = &n.public_id {
134                let mut len = public_id.len() + 10;
135
136                if let Some(system_id) = &n.system_id {
137                    len += system_id.len() + 3
138                }
139
140                len
141            } else if let Some(system_id) = &n.system_id {
142                system_id.len() + 10
143            } else {
144                0
145            },
146        );
147
148        doctype.push('<');
149        doctype.push('!');
150
151        if self.config.minify {
152            doctype.push_str("doctype");
153        } else {
154            doctype.push_str("DOCTYPE");
155        }
156
157        if let Some(name) = &n.name {
158            doctype.push(' ');
159            doctype.push_str(name);
160        }
161
162        if let Some(public_id) = &n.public_id {
163            doctype.push(' ');
164
165            if self.config.minify {
166                doctype.push_str("public");
167            } else {
168                doctype.push_str("PUBLIC");
169            }
170
171            doctype.push(' ');
172
173            let public_id_quote = if public_id.contains('"') { '\'' } else { '"' };
174
175            doctype.push(public_id_quote);
176            doctype.push_str(public_id);
177            doctype.push(public_id_quote);
178
179            if let Some(system_id) = &n.system_id {
180                doctype.push(' ');
181
182                let system_id_quote = if system_id.contains('"') { '\'' } else { '"' };
183
184                doctype.push(system_id_quote);
185                doctype.push_str(system_id);
186                doctype.push(system_id_quote);
187            }
188        } else if let Some(system_id) = &n.system_id {
189            doctype.push(' ');
190
191            if self.config.minify {
192                doctype.push_str("system");
193            } else {
194                doctype.push_str("SYSTEM");
195            }
196
197            doctype.push(' ');
198
199            let system_id_quote = if system_id.contains('"') { '\'' } else { '"' };
200
201            doctype.push(system_id_quote);
202            doctype.push_str(system_id);
203            doctype.push(system_id_quote);
204        }
205
206        doctype.push('>');
207
208        write_multiline_raw!(self, n.span, &doctype);
209        formatting_newline!(self);
210    }
211
212    fn basic_emit_element(
213        &mut self,
214        n: &Element,
215        parent: Option<&Element>,
216        prev: Option<&Child>,
217        next: Option<&Child>,
218    ) -> Result {
219        if self.is_plaintext {
220            return Ok(());
221        }
222
223        let has_attributes = !n.attributes.is_empty();
224        let can_omit_start_tag = self.tag_omission
225            && !has_attributes
226            && n.namespace == Namespace::HTML
227            && match &*n.tag_name {
228                // Tag omission in text/html:
229                // An html element's start tag can be omitted if the first thing inside the html
230                // element is not a comment.
231                "html" if !matches!(n.children.first(), Some(Child::Comment(..))) => true,
232                // A head element's start tag can be omitted if the element is empty, or if the
233                // first thing inside the head element is an element.
234                "head"
235                    if !self.keep_head_and_body
236                        && (n.children.is_empty()
237                            || matches!(n.children.first(), Some(Child::Element(..)))) =>
238                {
239                    true
240                }
241                // A body element's start tag can be omitted if the element is empty, or if the
242                // first thing inside the body element is not ASCII whitespace or a comment, except
243                // if the first thing inside the body element would be parsed differently outside.
244                "body"
245                    if !self.keep_head_and_body
246                        && (n.children.is_empty()
247                            || (match n.children.first() {
248                                Some(Child::Text(text))
249                                    if !text.data.is_empty()
250                                        && text
251                                            .data
252                                            .chars()
253                                            .next()
254                                            .unwrap()
255                                            .is_ascii_whitespace() =>
256                                {
257                                    false
258                                }
259                                Some(Child::Comment(..)) => false,
260                                Some(Child::Element(Element {
261                                    namespace,
262                                    tag_name,
263                                    ..
264                                })) if *namespace == Namespace::HTML
265                                    && matches!(
266                                        &**tag_name,
267                                        "base"
268                                            | "basefont"
269                                            | "bgsound"
270                                            | "frameset"
271                                            | "link"
272                                            | "meta"
273                                            | "noframes"
274                                            | "noscript"
275                                            | "script"
276                                            | "style"
277                                            | "template"
278                                            | "title"
279                                    ) =>
280                                {
281                                    false
282                                }
283                                _ => true,
284                            })) =>
285                {
286                    true
287                }
288                // A colgroup element's start tag can be omitted if the first thing inside the
289                // colgroup element is a col element, and if the element is not immediately preceded
290                // by another colgroup element whose end tag has been omitted. (It can't be omitted
291                // if the element is empty.)
292                "colgroup"
293                    if match n.children.first() {
294                        Some(Child::Element(element))
295                            if element.namespace == Namespace::HTML
296                                && element.tag_name == "col" =>
297                        {
298                            !matches!(prev, Some(Child::Element(element)) if element.namespace == Namespace::HTML
299                                        && element.tag_name == "colgroup")
300                        }
301                        _ => false,
302                    } =>
303                {
304                    true
305                }
306                // A tbody element's start tag can be omitted if the first thing inside the tbody
307                // element is a tr element, and if the element is not immediately preceded by a
308                // tbody, thead, or tfoot element whose end tag has been omitted. (It can't be
309                // omitted if the element is empty.)
310                "tbody"
311                    if match n.children.first() {
312                        Some(Child::Element(element))
313                            if element.namespace == Namespace::HTML && element.tag_name == "tr" =>
314                        {
315                            !matches!(prev, Some(Child::Element(element)) if element.namespace == Namespace::HTML
316                            && matches!(
317                                &*element.tag_name,
318                                "tbody" | "thead" | "tfoot"
319                            ))
320                        }
321                        _ => false,
322                    } =>
323                {
324                    true
325                }
326                _ => false,
327            };
328
329        let is_void_element = match n.namespace {
330            Namespace::HTML => matches!(
331                &*n.tag_name,
332                "area"
333                    | "base"
334                    | "basefont"
335                    | "bgsound"
336                    | "br"
337                    | "col"
338                    | "embed"
339                    | "frame"
340                    | "hr"
341                    | "img"
342                    | "input"
343                    | "keygen"
344                    | "link"
345                    | "meta"
346                    | "param"
347                    | "source"
348                    | "track"
349                    | "wbr"
350            ),
351            Namespace::SVG => n.children.is_empty(),
352            Namespace::MATHML => n.children.is_empty(),
353            _ => false,
354        };
355
356        if !can_omit_start_tag {
357            write_raw!(self, "<");
358            write_raw!(self, &n.tag_name);
359
360            if has_attributes {
361                space!(self);
362
363                self.emit_list(&n.attributes, ListFormat::SpaceDelimited)?;
364            }
365
366            if (matches!(n.namespace, Namespace::SVG | Namespace::MATHML) && is_void_element)
367                || (self.self_closing_void_elements
368                    && n.is_self_closing
369                    && is_void_element
370                    && matches!(n.namespace, Namespace::HTML))
371            {
372                if self.config.minify {
373                    let need_space = match n.attributes.last() {
374                        Some(Attribute {
375                            value: Some(value), ..
376                        }) => !value.chars().any(|c| match c {
377                            c if c.is_ascii_whitespace() => true,
378                            '`' | '=' | '<' | '>' | '"' | '\'' => true,
379                            _ => false,
380                        }),
381                        _ => false,
382                    };
383
384                    if need_space {
385                        write_raw!(self, " ");
386                    }
387                } else {
388                    write_raw!(self, " ");
389                }
390
391                write_raw!(self, "/");
392            }
393
394            write_raw!(self, ">");
395
396            if !self.config.minify && n.namespace == Namespace::HTML && n.tag_name == "html" {
397                newline!(self);
398            }
399        }
400
401        if is_void_element {
402            return Ok(());
403        }
404
405        if !self.is_plaintext {
406            self.is_plaintext = matches!(&*n.tag_name, "plaintext");
407        }
408
409        if let Some(content) = &n.content {
410            emit!(self, content);
411        } else if !n.children.is_empty() {
412            let ctx = self.create_context_for_element(n);
413
414            let need_extra_newline =
415                n.namespace == Namespace::HTML && matches!(&*n.tag_name, "textarea" | "pre");
416
417            if need_extra_newline {
418                if let Some(Child::Text(Text { data, .. })) = &n.children.first() {
419                    if data.contains('\n') {
420                        newline!(self);
421                    } else {
422                        formatting_newline!(self);
423                    }
424                }
425            }
426
427            if self.tag_omission {
428                self.with_ctx(ctx)
429                    .emit_list_for_tag_omission(TagOmissionParent::Element(n))?;
430            } else {
431                self.with_ctx(ctx)
432                    .emit_list(&n.children, ListFormat::NotDelimited)?;
433            }
434        }
435
436        let can_omit_end_tag = self.is_plaintext
437            || (self.tag_omission
438                && n.namespace == Namespace::HTML
439                && match &*n.tag_name {
440                    // Tag omission in text/html:
441
442                    // An html element's end tag can be omitted if the html element is not
443                    // immediately followed by a comment.
444                    //
445                    // A body element's end tag can be omitted if the body element is not
446                    // immediately followed by a comment.
447                    "html" => !matches!(next, Some(Child::Comment(..))),
448                    "body" if !self.keep_head_and_body => !matches!(next, Some(Child::Comment(..))),
449                    // A head element's end tag can be omitted if the head element is not
450                    // immediately followed by ASCII whitespace or a comment.
451                    "head" if !self.keep_head_and_body => match next {
452                        Some(Child::Text(text))
453                            if text.data.chars().next().unwrap().is_ascii_whitespace() =>
454                        {
455                            false
456                        }
457                        Some(Child::Comment(..)) => false,
458                        _ => true,
459                    },
460                    // A p element's end tag can be omitted if the p element is immediately followed
461                    // by an address, article, aside, blockquote, details, div, dl, fieldset,
462                    // figcaption, figure, footer, form, h1, h2, h3, h4, h5, h6, header, hgroup, hr,
463                    // main, menu, nav, ol, p, pre, section, table, or ul element, or if there is no
464                    // more content in the parent element and the parent element is an HTML element
465                    // that is not an a, audio, del, ins, map, noscript, or video element, or an
466                    // autonomous custom element.
467                    "p" => match next {
468                        Some(Child::Element(Element {
469                            namespace,
470                            tag_name,
471                            ..
472                        })) if *namespace == Namespace::HTML
473                            && matches!(
474                                &**tag_name,
475                                "address"
476                                    | "article"
477                                    | "aside"
478                                    | "blockquote"
479                                    | "details"
480                                    | "div"
481                                    | "dl"
482                                    | "fieldset"
483                                    | "figcaption"
484                                    | "figure"
485                                    | "footer"
486                                    | "form"
487                                    | "h1"
488                                    | "h2"
489                                    | "h3"
490                                    | "h4"
491                                    | "h5"
492                                    | "h6"
493                                    | "header"
494                                    | "hgroup"
495                                    | "hr"
496                                    | "main"
497                                    | "menu"
498                                    | "nav"
499                                    | "ol"
500                                    | "p"
501                                    | "pre"
502                                    | "section"
503                                    | "table"
504                                    | "ul"
505                            ) =>
506                        {
507                            true
508                        }
509                        None if match parent {
510                            Some(Element {
511                                namespace,
512                                tag_name,
513                                ..
514                            }) if is_html_tag_name(*namespace, tag_name)
515                                && !matches!(
516                                    &**tag_name,
517                                    "a" | "audio"
518                                        | "acronym"
519                                        | "big"
520                                        | "del"
521                                        | "font"
522                                        | "ins"
523                                        | "tt"
524                                        | "strike"
525                                        | "map"
526                                        | "noscript"
527                                        | "video"
528                                        | "kbd"
529                                        | "rbc"
530                                ) =>
531                            {
532                                true
533                            }
534                            _ => false,
535                        } =>
536                        {
537                            true
538                        }
539                        _ => false,
540                    },
541                    // An li element's end tag can be omitted if the li element is immediately
542                    // followed by another li element or if there is no more content in the parent
543                    // element.
544                    "li" if match parent {
545                        Some(Element {
546                            namespace,
547                            tag_name,
548                            ..
549                        }) if *namespace == Namespace::HTML
550                            && matches!(&**tag_name, "ul" | "ol" | "menu") =>
551                        {
552                            true
553                        }
554                        _ => false,
555                    } =>
556                    {
557                        match next {
558                            Some(Child::Element(Element {
559                                namespace,
560                                tag_name,
561                                ..
562                            })) if *namespace == Namespace::HTML && *tag_name == "li" => true,
563                            None => true,
564                            _ => false,
565                        }
566                    }
567                    // A dt element's end tag can be omitted if the dt element is immediately
568                    // followed by another dt element or a dd element.
569                    "dt" => match next {
570                        Some(Child::Element(Element {
571                            namespace,
572                            tag_name,
573                            ..
574                        })) if *namespace == Namespace::HTML
575                            && (*tag_name == "dt" || *tag_name == "dd") =>
576                        {
577                            true
578                        }
579                        _ => false,
580                    },
581                    // A dd element's end tag can be omitted if the dd element is immediately
582                    // followed by another dd element or a dt element, or if there is no more
583                    // content in the parent element.
584                    "dd" => match next {
585                        Some(Child::Element(Element {
586                            namespace,
587                            tag_name,
588                            ..
589                        })) if *namespace == Namespace::HTML
590                            && (*tag_name == "dd" || *tag_name == "dt") =>
591                        {
592                            true
593                        }
594                        None => true,
595                        _ => false,
596                    },
597                    // An rt element's end tag can be omitted if the rt element is immediately
598                    // followed by an rt or rp element, or if there is no more content in the parent
599                    // element.
600                    //
601                    // An rp element's end tag can be omitted if the rp element is immediately
602                    // followed by an rt or rp element, or if there is no more content in the parent
603                    // element.
604                    "rt" | "rp" => match next {
605                        Some(Child::Element(Element {
606                            namespace,
607                            tag_name,
608                            ..
609                        })) if *namespace == Namespace::HTML
610                            && (*tag_name == "rt" || *tag_name == "rp") =>
611                        {
612                            true
613                        }
614                        None => true,
615                        _ => false,
616                    },
617                    // The end tag can be omitted if the element is immediately followed by an <rt>,
618                    // <rtc>, or <rp> element or another <rb> element, or if there is no more
619                    // content in the parent element.
620                    "rb" => match next {
621                        Some(Child::Element(Element {
622                            namespace,
623                            tag_name,
624                            ..
625                        })) if *namespace == Namespace::HTML
626                            && (*tag_name == "rt"
627                                || *tag_name == "rtc"
628                                || *tag_name == "rp"
629                                || *tag_name == "rb") =>
630                        {
631                            true
632                        }
633                        None => true,
634                        _ => false,
635                    },
636                    // 	The closing tag can be omitted if it is immediately followed by a <rb>,
637                    // <rtc> or <rt> element opening tag or by its parent
638                    // closing tag.
639                    "rtc" => match next {
640                        Some(Child::Element(Element {
641                            namespace,
642                            tag_name,
643                            ..
644                        })) if *namespace == Namespace::HTML
645                            && (*tag_name == "rb" || *tag_name == "rtc" || *tag_name == "rt") =>
646                        {
647                            true
648                        }
649                        None => true,
650                        _ => false,
651                    },
652                    // An optgroup element's end tag can be omitted if the optgroup element is
653                    // immediately followed by another optgroup element, or if there is no more
654                    // content in the parent element.
655                    "optgroup" => match next {
656                        Some(Child::Element(Element {
657                            namespace,
658                            tag_name,
659                            ..
660                        })) if *namespace == Namespace::HTML && *tag_name == "optgroup" => true,
661                        None => true,
662                        _ => false,
663                    },
664                    // An option element's end tag can be omitted if the option element is
665                    // immediately followed by another option element, or if it is immediately
666                    // followed by an optgroup element, or if there is no more content in the parent
667                    // element.
668                    "option" => match next {
669                        Some(Child::Element(Element {
670                            namespace,
671                            tag_name,
672                            ..
673                        })) if *namespace == Namespace::HTML
674                            && (*tag_name == "option" || *tag_name == "optgroup") =>
675                        {
676                            true
677                        }
678                        None => true,
679                        _ => false,
680                    },
681                    // A caption element's end tag can be omitted if the caption element is not
682                    // immediately followed by ASCII whitespace or a comment.
683                    //
684                    // A colgroup element's end tag can be omitted if the colgroup element is not
685                    // immediately followed by ASCII whitespace or a comment.
686                    "caption" | "colgroup" => match next {
687                        Some(Child::Text(text))
688                            if text.data.chars().next().unwrap().is_ascii_whitespace() =>
689                        {
690                            false
691                        }
692                        Some(Child::Comment(..)) => false,
693                        _ => true,
694                    },
695                    // A tbody element's end tag can be omitted if the tbody element is immediately
696                    // followed by a tbody or tfoot element, or if there is no more content in the
697                    // parent element.
698                    "tbody" => match next {
699                        Some(Child::Element(Element {
700                            namespace,
701                            tag_name,
702                            ..
703                        })) if *namespace == Namespace::HTML
704                            && (*tag_name == "tbody" || *tag_name == "tfoot") =>
705                        {
706                            true
707                        }
708                        None => true,
709                        _ => false,
710                    },
711                    // A thead element's end tag can be omitted if the thead element is immediately
712                    // followed by a tbody or tfoot element.
713                    "thead" => match next {
714                        Some(Child::Element(Element {
715                            namespace,
716                            tag_name,
717                            ..
718                        })) if *namespace == Namespace::HTML
719                            && (*tag_name == "tbody" || *tag_name == "tfoot") =>
720                        {
721                            true
722                        }
723                        _ => false,
724                    },
725                    // A tfoot element's end tag can be omitted if there is no more content in the
726                    // parent element.
727                    "tfoot" => next.is_none(),
728                    // A tr element's end tag can be omitted if the tr element is immediately
729                    // followed by another tr element, or if there is no more content in the parent
730                    // element.
731                    "tr" => match next {
732                        Some(Child::Element(Element {
733                            namespace,
734                            tag_name,
735                            ..
736                        })) if *namespace == Namespace::HTML && *tag_name == "tr" => true,
737                        None => true,
738                        _ => false,
739                    },
740                    // A th element's end tag can be omitted if the th element is immediately
741                    // followed by a td or th element, or if there is no more content in the parent
742                    // element.
743                    "td" | "th" => match next {
744                        Some(Child::Element(Element {
745                            namespace,
746                            tag_name,
747                            ..
748                        })) if *namespace == Namespace::HTML
749                            && (*tag_name == "td" || *tag_name == "th") =>
750                        {
751                            true
752                        }
753                        None => true,
754                        _ => false,
755                    },
756                    _ => false,
757                });
758
759        if can_omit_end_tag {
760            return Ok(());
761        }
762
763        write_raw!(self, "<");
764        write_raw!(self, "/");
765        write_raw!(self, &n.tag_name);
766        write_raw!(self, ">");
767
768        Ok(())
769    }
770
771    #[emitter]
772    fn emit_element(&mut self, n: &Element) -> Result {
773        self.basic_emit_element(n, None, None, None)?;
774    }
775
776    #[emitter]
777    fn emit_attribute(&mut self, n: &Attribute) -> Result {
778        let mut attribute = String::with_capacity(
779            if let Some(prefix) = &n.prefix {
780                prefix.len() + 1
781            } else {
782                0
783            } + n.name.len()
784                + if let Some(value) = &n.value {
785                    value.len() + 1
786                } else {
787                    0
788                },
789        );
790
791        if let Some(prefix) = &n.prefix {
792            attribute.push_str(prefix);
793            attribute.push(':');
794        }
795
796        attribute.push_str(&n.name);
797
798        if let Some(value) = &n.value {
799            attribute.push('=');
800
801            if self.config.minify {
802                let (minifier, quote) = minify_attribute_value(value, self.quotes);
803
804                if let Some(quote) = quote {
805                    attribute.push(quote);
806                }
807
808                attribute.push_str(&minifier);
809
810                if let Some(quote) = quote {
811                    attribute.push(quote);
812                }
813            } else {
814                let normalized = escape_string(value, true);
815
816                attribute.push('"');
817                attribute.push_str(&normalized);
818                attribute.push('"');
819            }
820        }
821
822        write_multiline_raw!(self, n.span, &attribute);
823    }
824
825    #[emitter]
826    fn emit_text(&mut self, n: &Text) -> Result {
827        if self.ctx.need_escape_text {
828            if self.config.minify {
829                write_multiline_raw!(self, n.span, &minify_text(&n.data));
830            } else {
831                write_multiline_raw!(self, n.span, &escape_string(&n.data, false));
832            }
833        } else {
834            write_multiline_raw!(self, n.span, &n.data);
835        }
836    }
837
838    #[emitter]
839    fn emit_comment(&mut self, n: &Comment) -> Result {
840        let mut comment = String::with_capacity(n.data.len() + 7);
841
842        comment.push_str("<!--");
843        comment.push_str(&n.data);
844        comment.push_str("-->");
845
846        write_multiline_raw!(self, n.span, &comment);
847    }
848
849    fn create_context_for_element(&self, n: &Element) -> Ctx {
850        let need_escape_text = match &*n.tag_name {
851            "style" | "script" | "xmp" | "iframe" | "noembed" | "noframes" | "plaintext" => false,
852            "noscript" => !self.config.scripting_enabled,
853            _ if self.is_plaintext => false,
854            _ => true,
855        };
856
857        Ctx {
858            need_escape_text,
859            ..self.ctx
860        }
861    }
862
863    fn emit_list_for_tag_omission(&mut self, parent: TagOmissionParent) -> Result {
864        let nodes = match &parent {
865            TagOmissionParent::Document(document) => &document.children,
866            TagOmissionParent::DocumentFragment(document_fragment) => &document_fragment.children,
867            TagOmissionParent::Element(element) => &element.children,
868        };
869        let parent = match parent {
870            TagOmissionParent::Element(element) => Some(element),
871            _ => None,
872        };
873
874        for (idx, node) in nodes.iter().enumerate() {
875            match node {
876                Child::Element(element) => {
877                    let prev = if idx > 0 { nodes.get(idx - 1) } else { None };
878                    let next = nodes.get(idx + 1);
879
880                    self.basic_emit_element(element, parent, prev, next)?;
881                }
882                _ => {
883                    emit!(self, node)
884                }
885            }
886        }
887
888        Ok(())
889    }
890
891    fn emit_list<N>(&mut self, nodes: &[N], format: ListFormat) -> Result
892    where
893        Self: Emit<N>,
894        N: Spanned,
895    {
896        for (idx, node) in nodes.iter().enumerate() {
897            if idx != 0 {
898                self.write_delim(format)?;
899
900                if format & ListFormat::LinesMask == ListFormat::MultiLine {
901                    formatting_newline!(self);
902                }
903            }
904
905            emit!(self, node)
906        }
907
908        Ok(())
909    }
910
911    fn write_delim(&mut self, f: ListFormat) -> Result {
912        match f & ListFormat::DelimitersMask {
913            ListFormat::None => {}
914            ListFormat::SpaceDelimited => {
915                space!(self)
916            }
917            _ => unreachable!(),
918        }
919
920        Ok(())
921    }
922}
923
924#[allow(clippy::unused_peekable)]
925fn minify_attribute_value(value: &str, quotes: bool) -> (Cow<'_, str>, Option<char>) {
926    if value.is_empty() {
927        return (Cow::Borrowed(value), Some('"'));
928    }
929
930    // Fast-path
931    if !quotes
932        && value.chars().all(|c| match c {
933            '&' | '`' | '=' | '<' | '>' | '"' | '\'' => false,
934            c if c.is_ascii_whitespace() => false,
935            _ => true,
936        })
937    {
938        return (Cow::Borrowed(value), None);
939    }
940
941    let mut minified = String::with_capacity(value.len());
942
943    let mut unquoted = true;
944    let mut dq = 0;
945    let mut sq = 0;
946
947    let mut chars = value.chars().peekable();
948
949    while let Some(c) = chars.next() {
950        match c {
951            '&' => {
952                let next = chars.next();
953
954                if let Some(next) = next {
955                    if matches!(next, '#' | 'a'..='z' | 'A'..='Z') {
956                        minified.push_str(&minify_amp(next, &mut chars));
957                    } else {
958                        minified.push('&');
959                        minified.push(next);
960                    }
961                } else {
962                    minified.push('&');
963                }
964
965                continue;
966            }
967            c if c.is_ascii_whitespace() => {
968                unquoted = false;
969            }
970            '`' | '=' | '<' | '>' => {
971                unquoted = false;
972            }
973            '"' => {
974                unquoted = false;
975                dq += 1;
976            }
977            '\'' => {
978                unquoted = false;
979                sq += 1;
980            }
981
982            _ => {}
983        };
984
985        minified.push(c);
986    }
987
988    if !quotes && unquoted {
989        return (Cow::Owned(minified), None);
990    }
991
992    if dq > sq {
993        (Cow::Owned(minified.replace('\'', "&apos;")), Some('\''))
994    } else {
995        (Cow::Owned(minified.replace('"', "&quot;")), Some('"'))
996    }
997}
998
999#[allow(clippy::unused_peekable)]
1000fn minify_text(value: &str) -> Cow<'_, str> {
1001    // Fast-path
1002    if value.is_empty() {
1003        return Cow::Borrowed(value);
1004    }
1005
1006    // Fast-path
1007    if value.chars().all(|c| match c {
1008        '&' | '<' => false,
1009        _ => true,
1010    }) {
1011        return Cow::Borrowed(value);
1012    }
1013
1014    let mut result = String::with_capacity(value.len());
1015    let mut chars = value.chars().peekable();
1016
1017    while let Some(c) = chars.next() {
1018        match c {
1019            '&' => {
1020                let next = chars.next();
1021
1022                if let Some(next) = next {
1023                    if matches!(next, '#' | 'a'..='z' | 'A'..='Z') {
1024                        result.push_str(&minify_amp(next, &mut chars));
1025                    } else {
1026                        result.push('&');
1027                        result.push(next);
1028                    }
1029                } else {
1030                    result.push('&');
1031                }
1032            }
1033            '<' => {
1034                result.push_str("&lt;");
1035            }
1036            _ => result.push(c),
1037        }
1038    }
1039
1040    Cow::Owned(result)
1041}
1042
1043fn minify_amp(next: char, chars: &mut Peekable<Chars>) -> String {
1044    let mut result = String::with_capacity(7);
1045
1046    match next {
1047        hash @ '#' => {
1048            match chars.next() {
1049                // HTML CODE
1050                // Prevent `&amp;#38;` -> `&#38`
1051                Some(number @ '0'..='9') => {
1052                    result.push_str("&amp;");
1053                    result.push(hash);
1054                    result.push(number);
1055                }
1056                Some(x @ 'x' | x @ 'X') => {
1057                    match chars.peek() {
1058                        // HEX CODE
1059                        // Prevent `&amp;#x38;` -> `&#x38`
1060                        Some(c) if c.is_ascii_hexdigit() => {
1061                            result.push_str("&amp;");
1062                            result.push(hash);
1063                            result.push(x);
1064                        }
1065                        _ => {
1066                            result.push('&');
1067                            result.push(hash);
1068                            result.push(x);
1069                        }
1070                    }
1071                }
1072                any => {
1073                    result.push('&');
1074                    result.push(hash);
1075
1076                    if let Some(any) = any {
1077                        result.push(any);
1078                    }
1079                }
1080            }
1081        }
1082        // Named entity
1083        // Prevent `&amp;current` -> `&current`
1084        c @ 'a'..='z' | c @ 'A'..='Z' => {
1085            let mut entity_temporary_buffer = String::with_capacity(33);
1086
1087            entity_temporary_buffer.push('&');
1088            entity_temporary_buffer.push(c);
1089
1090            let mut found_entity = false;
1091
1092            // No need to validate input, because we reset position if nothing was found
1093            for c in chars {
1094                entity_temporary_buffer.push(c);
1095
1096                if HTML_ENTITIES.get(&entity_temporary_buffer).is_some() {
1097                    found_entity = true;
1098
1099                    break;
1100                } else {
1101                    // We stop when:
1102                    //
1103                    // - not ascii alphanumeric
1104                    // - we consume more characters than the longest entity
1105                    if !c.is_ascii_alphanumeric() || entity_temporary_buffer.len() > 32 {
1106                        break;
1107                    }
1108                }
1109            }
1110
1111            if found_entity {
1112                result.push_str("&amp;");
1113                result.push_str(&entity_temporary_buffer[1..]);
1114            } else {
1115                result.push('&');
1116                result.push_str(&entity_temporary_buffer[1..]);
1117            }
1118        }
1119        any => {
1120            result.push('&');
1121            result.push(any);
1122        }
1123    }
1124
1125    result
1126}
1127
1128// Escaping a string (for the purposes of the algorithm above) consists of
1129// running the following steps:
1130//
1131// 1. Replace any occurrence of the "&" character by the string "&amp;".
1132//
1133// 2. Replace any occurrences of the U+00A0 NO-BREAK SPACE character by the
1134// string "&nbsp;".
1135//
1136// 3. If the algorithm was invoked in the attribute mode, replace any
1137// occurrences of the """ character by the string "&quot;".
1138//
1139// 4. If the algorithm was not invoked in the attribute mode, replace any
1140// occurrences of the "<" character by the string "&lt;", and any occurrences of
1141// the ">" character by the string "&gt;".
1142fn escape_string(value: &str, is_attribute_mode: bool) -> Cow<'_, str> {
1143    // Fast-path
1144    if value.is_empty() {
1145        return Cow::Borrowed(value);
1146    }
1147
1148    if value.chars().all(|c| match c {
1149        '&' | '\u{00A0}' => false,
1150        '"' if is_attribute_mode => false,
1151        '<' if !is_attribute_mode => false,
1152        '>' if !is_attribute_mode => false,
1153        _ => true,
1154    }) {
1155        return Cow::Borrowed(value);
1156    }
1157
1158    let mut result = String::with_capacity(value.len());
1159
1160    for c in value.chars() {
1161        match c {
1162            '&' => {
1163                result.push_str("&amp;");
1164            }
1165            '\u{00A0}' => result.push_str("&nbsp;"),
1166            '"' if is_attribute_mode => result.push_str("&quot;"),
1167            '<' if !is_attribute_mode => {
1168                result.push_str("&lt;");
1169            }
1170            '>' if !is_attribute_mode => {
1171                result.push_str("&gt;");
1172            }
1173            _ => result.push(c),
1174        }
1175    }
1176
1177    Cow::Owned(result)
1178}
1179
1180fn is_html_tag_name(namespace: Namespace, tag_name: &Atom) -> bool {
1181    if namespace != Namespace::HTML {
1182        return false;
1183    }
1184
1185    matches!(
1186        &**tag_name,
1187        "a" | "abbr"
1188            | "acronym"
1189            | "address"
1190            | "applet"
1191            | "area"
1192            | "article"
1193            | "aside"
1194            | "audio"
1195            | "b"
1196            | "base"
1197            | "basefont"
1198            | "bdi"
1199            | "bdo"
1200            | "big"
1201            | "blockquote"
1202            | "body"
1203            | "br"
1204            | "button"
1205            | "canvas"
1206            | "caption"
1207            | "center"
1208            | "cite"
1209            | "code"
1210            | "col"
1211            | "colgroup"
1212            | "data"
1213            | "datalist"
1214            | "dd"
1215            | "del"
1216            | "details"
1217            | "dfn"
1218            | "dialog"
1219            | "dir"
1220            | "div"
1221            | "dl"
1222            | "dt"
1223            | "em"
1224            | "embed"
1225            | "fieldset"
1226            | "figcaption"
1227            | "figure"
1228            | "font"
1229            | "footer"
1230            | "form"
1231            | "frame"
1232            | "frameset"
1233            | "h1"
1234            | "h2"
1235            | "h3"
1236            | "h4"
1237            | "h5"
1238            | "h6"
1239            | "head"
1240            | "header"
1241            | "hgroup"
1242            | "hr"
1243            | "html"
1244            | "i"
1245            | "iframe"
1246            | "image"
1247            | "img"
1248            | "input"
1249            | "ins"
1250            | "isindex"
1251            | "kbd"
1252            | "keygen"
1253            | "label"
1254            | "legend"
1255            | "li"
1256            | "link"
1257            | "listing"
1258            | "main"
1259            | "map"
1260            | "mark"
1261            | "marquee"
1262            | "menu"
1263            // Removed from spec, but we keep here to track it
1264            // | "menuitem"
1265            | "meta"
1266            | "meter"
1267            | "nav"
1268            | "nobr"
1269            | "noembed"
1270            | "noframes"
1271            | "noscript"
1272            | "object"
1273            | "ol"
1274            | "optgroup"
1275            | "option"
1276            | "output"
1277            | "p"
1278            | "param"
1279            | "picture"
1280            | "plaintext"
1281            | "pre"
1282            | "progress"
1283            | "q"
1284            | "rb"
1285            | "rbc"
1286            | "rp"
1287            | "rt"
1288            | "rtc"
1289            | "ruby"
1290            | "s"
1291            | "samp"
1292            | "script"
1293            | "section"
1294            | "select"
1295            | "small"
1296            | "source"
1297            | "span"
1298            | "strike"
1299            | "strong"
1300            | "style"
1301            | "sub"
1302            | "summary"
1303            | "sup"
1304            | "table"
1305            | "tbody"
1306            | "td"
1307            | "template"
1308            | "textarea"
1309            | "tfoot"
1310            | "th"
1311            | "thead"
1312            | "time"
1313            | "title"
1314            | "tr"
1315            | "track"
1316            | "tt"
1317            | "u"
1318            | "ul"
1319            | "var"
1320            | "video"
1321            | "wbr"
1322            | "xmp"
1323    )
1324}