swc_ecma_codegen/
lit.rs

1use std::{fmt::Write, io, str};
2
3use ascii::AsciiChar;
4use compact_str::CompactString;
5use swc_atoms::wtf8::{CodePoint, Wtf8};
6use swc_common::{Spanned, DUMMY_SP};
7use swc_ecma_ast::*;
8use swc_ecma_codegen_macros::node_impl;
9
10#[cfg(swc_ast_unknown)]
11use crate::unknown_error;
12use crate::{text_writer::WriteJs, CowStr, Emitter, SourceMapperExt};
13
14#[node_impl]
15impl MacroNode for Lit {
16    fn emit(&mut self, emitter: &mut Macro) -> Result {
17        emitter.emit_leading_comments_of_span(self.span(), false)?;
18
19        srcmap!(emitter, self, true);
20
21        match self {
22            Lit::Bool(Bool { value, .. }) => {
23                if *value {
24                    keyword!(emitter, "true")
25                } else {
26                    keyword!(emitter, "false")
27                }
28            }
29            Lit::Null(Null { .. }) => keyword!(emitter, "null"),
30            Lit::Str(ref s) => emit!(s),
31            Lit::BigInt(ref s) => emit!(s),
32            Lit::Num(ref n) => emit!(n),
33            Lit::Regex(ref n) => {
34                punct!(emitter, "/");
35                // Encode non-ASCII characters in regex pattern when ascii_only is enabled
36                let encoded_exp = encode_regex_for_ascii(&n.exp, emitter.cfg.ascii_only);
37                emitter.wr.write_str(&encoded_exp)?;
38                punct!(emitter, "/");
39                emitter.wr.write_str(&n.flags)?;
40            }
41            Lit::JSXText(ref n) => emit!(n),
42            #[cfg(swc_ast_unknown)]
43            _ => return Err(unknown_error()),
44        }
45
46        Ok(())
47    }
48}
49
50#[node_impl]
51impl MacroNode for Str {
52    fn emit(&mut self, emitter: &mut Macro) -> Result {
53        emitter.wr.commit_pending_semi()?;
54
55        emitter.emit_leading_comments_of_span(self.span(), false)?;
56
57        srcmap!(emitter, self, true);
58
59        if &*self.value == "use strict"
60            && self.raw.is_some()
61            && self.raw.as_ref().unwrap().contains('\\')
62            && (!emitter.cfg.inline_script || !self.raw.as_ref().unwrap().contains("script"))
63        {
64            emitter
65                .wr
66                .write_str_lit(DUMMY_SP, self.raw.as_ref().unwrap())?;
67
68            srcmap!(emitter, self, false);
69
70            return Ok(());
71        }
72
73        let target = emitter.cfg.target;
74
75        if !emitter.cfg.minify {
76            if let Some(raw) = &self.raw {
77                let es5_safe = match emitter.cfg.target {
78                    EsVersion::Es3 | EsVersion::Es5 => {
79                        // Block raw strings containing ES6+ Unicode escapes (\u{...}) for ES3/ES5
80                        // targets
81                        !raw.contains("\\u{")
82                    }
83                    _ => true,
84                };
85
86                if es5_safe
87                    && (!emitter.cfg.ascii_only || raw.is_ascii())
88                    && (!emitter.cfg.inline_script
89                        || !self.raw.as_ref().unwrap().contains("script"))
90                {
91                    emitter.wr.write_str_lit(DUMMY_SP, raw)?;
92                    return Ok(());
93                }
94            }
95        }
96
97        let (quote_char, mut value) = get_quoted_utf16(&self.value, emitter.cfg.ascii_only, target);
98
99        if emitter.cfg.inline_script {
100            value = CowStr::Owned(
101                replace_close_inline_script(&value)
102                    .replace("\x3c!--", "\\x3c!--")
103                    .replace("--\x3e", "--\\x3e")
104                    .into(),
105            );
106        }
107
108        let quote_str = [quote_char.as_byte()];
109        let quote_str = unsafe {
110            // Safety: quote_char is valid ascii
111            str::from_utf8_unchecked(&quote_str)
112        };
113
114        emitter.wr.write_str(quote_str)?;
115        emitter.wr.write_str_lit(DUMMY_SP, &value)?;
116        emitter.wr.write_str(quote_str)?;
117
118        // srcmap!(emitter,self, false);
119
120        Ok(())
121    }
122}
123
124#[node_impl]
125impl MacroNode for Number {
126    fn emit(&mut self, emitter: &mut Macro) -> Result {
127        emitter.emit_num_lit_internal(self, false)?;
128
129        Ok(())
130    }
131}
132
133#[node_impl]
134impl MacroNode for BigInt {
135    fn emit(&mut self, emitter: &mut Macro) -> Result {
136        emitter.emit_leading_comments_of_span(self.span, false)?;
137
138        if emitter.cfg.minify {
139            let value = if *self.value >= 10000000000000000_i64.into() {
140                format!("0x{}", self.value.to_str_radix(16))
141            } else if *self.value <= (-10000000000000000_i64).into() {
142                format!("-0x{}", (-*self.value.clone()).to_str_radix(16))
143            } else {
144                self.value.to_string()
145            };
146            emitter.wr.write_lit(self.span, &value)?;
147            emitter.wr.write_lit(self.span, "n")?;
148        } else {
149            match &self.raw {
150                Some(raw) => {
151                    if raw.len() > 2 && emitter.cfg.target < EsVersion::Es2021 && raw.contains('_')
152                    {
153                        emitter.wr.write_str_lit(self.span, &raw.replace('_', ""))?;
154                    } else {
155                        emitter.wr.write_str_lit(self.span, raw)?;
156                    }
157                }
158                _ => {
159                    emitter.wr.write_lit(self.span, &self.value.to_string())?;
160                    emitter.wr.write_lit(self.span, "n")?;
161                }
162            }
163        }
164
165        Ok(())
166    }
167}
168
169#[node_impl]
170impl MacroNode for Bool {
171    fn emit(&mut self, emitter: &mut Macro) -> Result {
172        emitter.emit_leading_comments_of_span(self.span(), false)?;
173
174        if self.value {
175            keyword!(emitter, self.span, "true")
176        } else {
177            keyword!(emitter, self.span, "false")
178        }
179
180        Ok(())
181    }
182}
183
184pub fn replace_close_inline_script(raw: &str) -> CowStr {
185    let chars = raw.as_bytes();
186    let pattern_len = 8; // </script>
187
188    let mut matched_indexes = chars
189        .iter()
190        .enumerate()
191        .filter(|(index, byte)| {
192            byte == &&b'<'
193                && index + pattern_len < chars.len()
194                && chars[index + 1..index + pattern_len].eq_ignore_ascii_case(b"/script")
195                && matches!(
196                    chars[index + pattern_len],
197                    b'>' | b' ' | b'\t' | b'\n' | b'\x0C' | b'\r'
198                )
199        })
200        .map(|(index, _)| index)
201        .peekable();
202
203    if matched_indexes.peek().is_none() {
204        return CowStr::Borrowed(raw);
205    }
206
207    let mut result = CompactString::new(raw);
208
209    for (offset, i) in matched_indexes.enumerate() {
210        result.insert(i + 1 + offset, '\\');
211    }
212
213    CowStr::Owned(result)
214}
215
216impl<W, S: swc_common::SourceMapper> Emitter<'_, W, S>
217where
218    W: WriteJs,
219    S: SourceMapperExt,
220{
221    /// `1.toString` is an invalid property access,
222    /// should emit a dot after the literal if return true
223    pub fn emit_num_lit_internal(
224        &mut self,
225        num: &Number,
226        mut detect_dot: bool,
227    ) -> std::result::Result<bool, io::Error> {
228        self.wr.commit_pending_semi()?;
229
230        self.emit_leading_comments_of_span(num.span(), false)?;
231
232        // Handle infinity
233        if num.value.is_infinite() && num.raw.is_none() {
234            self.wr.write_str_lit(num.span, &num.value.print())?;
235
236            return Ok(false);
237        }
238
239        let mut striped_raw = None;
240        let mut value = String::default();
241
242        srcmap!(self, num, true);
243
244        if self.cfg.minify {
245            if num.value.is_infinite() && num.raw.is_some() {
246                self.wr.write_str_lit(DUMMY_SP, num.raw.as_ref().unwrap())?;
247            } else {
248                value = minify_number(num.value, &mut detect_dot);
249                self.wr.write_str_lit(DUMMY_SP, &value)?;
250            }
251        } else {
252            match &num.raw {
253                Some(raw) => {
254                    if raw.len() > 2 && self.cfg.target < EsVersion::Es2015 && {
255                        let slice = &raw.as_bytes()[..2];
256                        slice == b"0b" || slice == b"0o" || slice == b"0B" || slice == b"0O"
257                    } {
258                        if num.value.is_infinite() && num.raw.is_some() {
259                            self.wr.write_str_lit(DUMMY_SP, num.raw.as_ref().unwrap())?;
260                        } else {
261                            value = num.value.print();
262                            self.wr.write_str_lit(DUMMY_SP, &value)?;
263                        }
264                    } else if raw.len() > 2
265                        && self.cfg.target < EsVersion::Es2021
266                        && raw.contains('_')
267                    {
268                        let value = raw.replace('_', "");
269                        self.wr.write_str_lit(DUMMY_SP, &value)?;
270
271                        striped_raw = Some(value);
272                    } else {
273                        self.wr.write_str_lit(DUMMY_SP, raw)?;
274
275                        if !detect_dot {
276                            return Ok(false);
277                        }
278
279                        striped_raw = Some(raw.replace('_', ""));
280                    }
281                }
282                _ => {
283                    value = num.value.print();
284                    self.wr.write_str_lit(DUMMY_SP, &value)?;
285                }
286            }
287        }
288
289        // fast return
290        if !detect_dot {
291            return Ok(false);
292        }
293
294        Ok(striped_raw
295            .map(|raw| {
296                if raw.bytes().all(|c| c.is_ascii_digit()) {
297                    // Maybe legacy octal
298                    // Do we really need to support pre es5?
299                    let slice = raw.as_bytes();
300                    if slice.len() >= 2 && slice[0] == b'0' {
301                        return false;
302                    }
303
304                    return true;
305                }
306
307                false
308            })
309            .unwrap_or_else(|| {
310                let bytes = value.as_bytes();
311
312                if !bytes.contains(&b'.') && !bytes.contains(&b'e') {
313                    return true;
314                }
315
316                false
317            }))
318    }
319}
320
321/// Encodes non-ASCII characters in regex patterns when ascii_only is enabled.
322///
323/// This function converts non-ASCII characters to their Unicode escape
324/// sequences to ensure the regex can be safely used in ASCII-only contexts.
325///
326/// # Arguments
327/// * `pattern` - The regex pattern string to encode
328/// * `ascii_only` - Whether to encode non-ASCII characters
329///
330/// # Returns
331/// A string with non-ASCII characters encoded as Unicode escapes
332pub fn encode_regex_for_ascii(pattern: &str, ascii_only: bool) -> CowStr {
333    if !ascii_only || pattern.is_ascii() {
334        return CowStr::Borrowed(pattern);
335    }
336
337    let mut buf = CompactString::with_capacity(pattern.len());
338
339    for c in pattern.chars() {
340        match c {
341            // ASCII characters are preserved as-is
342            '\x00'..='\x7e' => buf.push(c),
343            // Characters in the \x7f to \xff range use \xHH format
344            '\u{7f}'..='\u{ff}' => {
345                buf.push_str("\\x");
346                write!(&mut buf, "{:02x}", c as u8).unwrap();
347            }
348            // Line/paragraph separators need escaping in all contexts
349            '\u{2028}' => buf.push_str("\\u2028"),
350            '\u{2029}' => buf.push_str("\\u2029"),
351            // Characters above \xff use \uHHHH format
352            _ => {
353                if c > '\u{FFFF}' {
354                    // Characters beyond BMP are encoded as surrogate pairs for compatibility
355                    let code_point = c as u32;
356                    let h = ((code_point - 0x10000) / 0x400) + 0xd800;
357                    let l = ((code_point - 0x10000) % 0x400) + 0xdc00;
358                    write!(&mut buf, "\\u{h:04x}\\u{l:04x}").unwrap();
359                } else {
360                    write!(&mut buf, "\\u{:04x}", c as u16).unwrap();
361                }
362            }
363        }
364    }
365
366    CowStr::Owned(buf)
367}
368
369macro_rules! cp {
370    ($c:expr) => {
371        unsafe { CodePoint::from_u32_unchecked($c as u32) }
372    };
373}
374
375const DOUBLE_QUOTE: CodePoint = cp!('"');
376const SINGLE_QUOTE: CodePoint = cp!('\'');
377const NULL_CHAR: CodePoint = cp!('\x00');
378const BACKSPACE: CodePoint = cp!('\u{0008}');
379const FORM_FEED: CodePoint = cp!('\u{000c}');
380const LINE_FEED: CodePoint = cp!('\n');
381const CARRIAGE_RETURN: CodePoint = cp!('\r');
382const VERTICAL_TAB: CodePoint = cp!('\u{000b}');
383const TAB: CodePoint = cp!('\t');
384const BACKSLASH: CodePoint = cp!('\\');
385const CTRL_START_1: CodePoint = cp!('\x01');
386const CTRL_END_1: CodePoint = cp!('\x0f');
387const CTRL_START_2: CodePoint = cp!('\x10');
388const CTRL_END_2: CodePoint = cp!('\x1f');
389const PRINTABLE_START: CodePoint = cp!('\x20');
390const PRINTABLE_END: CodePoint = cp!('\x7e');
391const DEL_START: CodePoint = cp!('\u{7f}');
392const DEL_END: CodePoint = cp!('\u{ff}');
393const LINE_SEPARATOR: CodePoint = cp!('\u{2028}');
394const PARAGRAPH_SEPARATOR: CodePoint = cp!('\u{2029}');
395const ZERO_WIDTH_NO_BREAK_SPACE: CodePoint = cp!('\u{FEFF}');
396
397const SURROGATE_START: CodePoint = cp!(0xd800);
398const SURROGATE_END: CodePoint = cp!(0xdfff);
399
400/// Returns `(quote_char, value)`
401pub fn get_quoted_utf16(v: &Wtf8, ascii_only: bool, target: EsVersion) -> (AsciiChar, CowStr) {
402    // Fast path: If the string is ASCII and doesn't need escaping, we can avoid
403    // allocation
404    if v.is_ascii() {
405        let mut needs_escaping = false;
406        let mut single_quote_count = 0;
407        let mut double_quote_count = 0;
408
409        for &b in v.as_bytes() {
410            match b {
411                b'\'' => single_quote_count += 1,
412                b'"' => double_quote_count += 1,
413                // Control characters and backslash need escaping
414                0..=0x1f | b'\\' => {
415                    needs_escaping = true;
416                    break;
417                }
418                _ => {}
419            }
420        }
421
422        if !needs_escaping {
423            let quote_char = if double_quote_count > single_quote_count {
424                AsciiChar::Apostrophe
425            } else {
426                AsciiChar::Quotation
427            };
428
429            // If there are no quotes to escape, we can return the original string
430            if (quote_char == AsciiChar::Apostrophe && single_quote_count == 0)
431                || (quote_char == AsciiChar::Quotation && double_quote_count == 0)
432            {
433                return (
434                    quote_char,
435                    // SAFETY: We have checked that the string is ASCII. So it does not contain any
436                    // unpaired surrogate.
437                    CowStr::Borrowed(v.as_str().unwrap()),
438                );
439            }
440        }
441    }
442
443    // Slow path: Original implementation for strings that need processing
444    // Count quotes first to determine which quote character to use
445    let (mut single_quote_count, mut double_quote_count) = (0, 0);
446    for c in v.code_points() {
447        match c {
448            SINGLE_QUOTE => single_quote_count += 1,
449            DOUBLE_QUOTE => double_quote_count += 1,
450            _ => {}
451        }
452    }
453
454    // Pre-calculate capacity to avoid reallocations
455    let quote_char = if double_quote_count > single_quote_count {
456        AsciiChar::Apostrophe
457    } else {
458        AsciiChar::Quotation
459    };
460    let escape_char = if quote_char == AsciiChar::Apostrophe {
461        AsciiChar::Apostrophe
462    } else {
463        AsciiChar::Quotation
464    };
465    let escape_count = if quote_char == AsciiChar::Apostrophe {
466        single_quote_count
467    } else {
468        double_quote_count
469    };
470
471    // Add 1 for each escaped quote
472    let capacity = v.len() + escape_count;
473    let mut buf = CompactString::with_capacity(capacity);
474
475    let mut iter = v.code_points().peekable();
476    while let Some(c) = iter.next() {
477        match c {
478            NULL_CHAR => {
479                if target < EsVersion::Es5
480                    || matches!(iter.peek(), Some(x) if *x >= cp!('0') && *x <= cp!('9'))
481                {
482                    buf.push_str("\\x00");
483                } else {
484                    buf.push_str("\\0");
485                }
486            }
487            BACKSPACE => buf.push_str("\\b"),
488            FORM_FEED => buf.push_str("\\f"),
489            LINE_FEED => buf.push_str("\\n"),
490            CARRIAGE_RETURN => buf.push_str("\\r"),
491            VERTICAL_TAB => buf.push_str("\\v"),
492            TAB => buf.push('\t'),
493            BACKSLASH => buf.push_str("\\\\"),
494            c if matches!(c.to_char(), Some(c) if c == escape_char) => {
495                buf.push('\\');
496                // SAFETY: `escape_char` is a valid ASCII character.
497                buf.push(c.to_char().unwrap());
498            }
499            c if c >= CTRL_START_1 && c <= CTRL_END_1 => {
500                buf.push_str("\\x0");
501                write!(&mut buf, "{:x}", c.to_u32() as u8).unwrap();
502            }
503            c if c >= CTRL_START_2 && c <= CTRL_END_2 => {
504                buf.push_str("\\x");
505                write!(&mut buf, "{:x}", c.to_u32() as u8).unwrap();
506            }
507            c if c >= PRINTABLE_START && c <= PRINTABLE_END => {
508                // SAFETY: c is a valid ASCII character.
509                buf.push(c.to_char().unwrap())
510            }
511            c if c >= DEL_START && c <= DEL_END => {
512                if ascii_only || target <= EsVersion::Es5 {
513                    buf.push_str("\\x");
514                    write!(&mut buf, "{:x}", c.to_u32() as u8).unwrap();
515                } else {
516                    // SAFETY: c is a valid Rust char.
517                    buf.push(c.to_char().unwrap());
518                }
519            }
520            LINE_SEPARATOR => buf.push_str("\\u2028"),
521            PARAGRAPH_SEPARATOR => buf.push_str("\\u2029"),
522            ZERO_WIDTH_NO_BREAK_SPACE => buf.push_str("\\uFEFF"),
523            c => {
524                if c.is_ascii() {
525                    // SAFETY: c is a valid ASCII character.
526                    buf.push(c.to_char().unwrap());
527                } else if c > cp!('\u{FFFF}') {
528                    if target <= EsVersion::Es5 {
529                        let h = ((c.to_u32() - 0x10000) / 0x400) + 0xd800;
530                        let l = (c.to_u32() - 0x10000) % 0x400 + 0xdc00;
531                        write!(&mut buf, "\\u{h:04X}\\u{l:04X}").unwrap();
532                    } else if ascii_only {
533                        write!(&mut buf, "\\u{{{:04X}}}", c.to_u32()).unwrap();
534                    } else {
535                        // SAFETY: c is a valid Rust char. (> U+FFFF && <= U+10FFFF)
536                        // The latter condition is guaranteed by CodePoint.
537                        buf.push(c.to_char().unwrap());
538                    }
539                } else if c >= SURROGATE_START && c <= SURROGATE_END {
540                    // Unparied Surrogate
541                    // Escape as \uXXXX
542                    write!(&mut buf, "\\u{:04X}", c.to_u32()).unwrap();
543                } else if ascii_only {
544                    write!(&mut buf, "\\u{:04X}", c.to_u32() as u16).unwrap();
545                } else {
546                    // SAFETY: c is a valid Rust char. (>= U+0080 && <= U+FFFF, excluding
547                    // surrogates)
548                    buf.push(c.to_char().unwrap());
549                }
550            }
551        }
552    }
553
554    (quote_char, CowStr::Owned(buf))
555}
556
557pub fn minify_number(num: f64, detect_dot: &mut bool) -> String {
558    // ddddd -> 0xhhhh
559    // len(0xhhhh) == len(ddddd)
560    // 10000000 <= num <= 0xffffff
561    'hex: {
562        if num.fract() == 0.0 && num.abs() <= u64::MAX as f64 {
563            let int = num.abs() as u64;
564
565            if int < 10000000 {
566                break 'hex;
567            }
568
569            // use scientific notation
570            if int % 1000 == 0 {
571                break 'hex;
572            }
573
574            *detect_dot = false;
575            return format!(
576                "{}{:#x}",
577                if num.is_sign_negative() { "-" } else { "" },
578                int
579            );
580        }
581    }
582
583    let mut num = num.to_string();
584
585    if num.contains(".") {
586        *detect_dot = false;
587    }
588
589    if let Some(num) = num.strip_prefix("0.") {
590        let cnt = clz(num);
591        if cnt > 2 {
592            return format!("{}e-{}", &num[cnt..], num.len());
593        }
594        return format!(".{num}");
595    }
596
597    if let Some(num) = num.strip_prefix("-0.") {
598        let cnt = clz(num);
599        if cnt > 2 {
600            return format!("-{}e-{}", &num[cnt..], num.len());
601        }
602        return format!("-.{num}");
603    }
604
605    if num.ends_with("000") {
606        *detect_dot = false;
607
608        let cnt = num
609            .as_bytes()
610            .iter()
611            .rev()
612            .skip(3)
613            .take_while(|&&c| c == b'0')
614            .count()
615            + 3;
616
617        num.truncate(num.len() - cnt);
618        num.push('e');
619        num.push_str(&cnt.to_string());
620    }
621
622    num
623}
624
625fn clz(s: &str) -> usize {
626    s.as_bytes().iter().take_while(|&&c| c == b'0').count()
627}
628
629pub trait Print {
630    fn print(&self) -> String;
631}
632
633impl Print for f64 {
634    fn print(&self) -> String {
635        // preserve -0.0
636        if *self == 0.0 {
637            return self.to_string();
638        }
639
640        let mut buffer = ryu_js::Buffer::new();
641        buffer.format(*self).to_string()
642    }
643}