swc_ecma_minifier/util/
base54.rs

1use std::{cmp::Reverse, io, ops::AddAssign};
2
3use arrayvec::ArrayVec;
4use swc_atoms::Atom;
5use swc_common::{
6    sync::Lrc, BytePos, FileLines, FileName, Loc, SourceMapper, Span, SpanLinesError,
7};
8use swc_ecma_ast::*;
9use swc_ecma_codegen::{text_writer::WriteJs, Emitter};
10
11#[derive(Clone, Copy)]
12
13pub(crate) struct CharFreq([i32; 256]);
14
15#[derive(Clone, Copy)]
16pub(crate) struct Base54Chars {
17    chars: [u8; 64],
18}
19
20impl Default for CharFreq {
21    fn default() -> Self {
22        CharFreq([0; 256])
23    }
24}
25
26struct DummySourceMap;
27
28impl SourceMapper for DummySourceMap {
29    fn lookup_char_pos(&self, _: BytePos) -> Loc {
30        unreachable!()
31    }
32
33    fn span_to_lines(&self, _: Span) -> Result<FileLines, Box<SpanLinesError>> {
34        unreachable!()
35    }
36
37    fn span_to_string(&self, _: Span) -> String {
38        String::new()
39    }
40
41    fn span_to_filename(&self, _: Span) -> Lrc<FileName> {
42        FileName::Anon.into()
43    }
44
45    fn merge_spans(&self, _: Span, _: Span) -> Option<Span> {
46        None
47    }
48
49    fn call_span_if_macro(&self, sp: Span) -> Span {
50        sp
51    }
52
53    fn doctest_offset_line(&self, line: usize) -> usize {
54        line
55    }
56
57    fn span_to_snippet(&self, _: Span) -> Result<String, Box<swc_common::SpanSnippetError>> {
58        Ok(String::new())
59    }
60}
61
62impl SourceMapperExt for DummySourceMap {
63    fn get_code_map(&self) -> &dyn SourceMapper {
64        self
65    }
66}
67
68impl CharFreq {
69    #[inline(always)]
70    fn write(&mut self, data: &str) -> io::Result<()> {
71        self.scan(data, 1);
72        Ok(())
73    }
74}
75
76impl WriteJs for CharFreq {
77    #[inline(always)]
78    fn increase_indent(&mut self) -> io::Result<()> {
79        Ok(())
80    }
81
82    #[inline(always)]
83    fn decrease_indent(&mut self) -> io::Result<()> {
84        Ok(())
85    }
86
87    #[inline(always)]
88    fn write_semi(&mut self, _: Option<Span>) -> io::Result<()> {
89        Ok(())
90    }
91
92    #[inline(always)]
93    fn write_space(&mut self) -> io::Result<()> {
94        Ok(())
95    }
96
97    #[inline(always)]
98    fn write_keyword(&mut self, _: Option<Span>, s: &'static str) -> io::Result<()> {
99        self.write(s)?;
100        Ok(())
101    }
102
103    #[inline(always)]
104    fn write_operator(&mut self, _: Option<Span>, s: &str) -> io::Result<()> {
105        self.write(s)?;
106        Ok(())
107    }
108
109    #[inline(always)]
110    fn write_param(&mut self, s: &str) -> io::Result<()> {
111        self.write(s)?;
112        Ok(())
113    }
114
115    #[inline(always)]
116    fn write_property(&mut self, s: &str) -> io::Result<()> {
117        self.write(s)?;
118        Ok(())
119    }
120
121    #[inline(always)]
122    fn write_line(&mut self) -> io::Result<()> {
123        Ok(())
124    }
125
126    #[inline(always)]
127    fn write_lit(&mut self, _: Span, s: &str) -> io::Result<()> {
128        self.write(s)?;
129
130        Ok(())
131    }
132
133    #[inline(always)]
134    fn write_comment(&mut self, s: &str) -> io::Result<()> {
135        self.write(s)?;
136
137        Ok(())
138    }
139
140    #[inline(always)]
141    fn write_str_lit(&mut self, _: Span, s: &str) -> io::Result<()> {
142        self.write(s)?;
143
144        Ok(())
145    }
146
147    #[inline(always)]
148    fn write_str(&mut self, s: &str) -> io::Result<()> {
149        self.write(s)?;
150        Ok(())
151    }
152
153    #[inline(always)]
154    fn write_symbol(&mut self, _: Span, s: &str) -> io::Result<()> {
155        self.write(s)?;
156        Ok(())
157    }
158
159    #[inline(always)]
160    fn write_punct(&mut self, _: Option<Span>, s: &'static str) -> io::Result<()> {
161        self.write(s)?;
162        Ok(())
163    }
164
165    #[inline(always)]
166    fn care_about_srcmap(&self) -> bool {
167        false
168    }
169
170    #[inline(always)]
171    fn add_srcmap(&mut self, _: BytePos) -> io::Result<()> {
172        Ok(())
173    }
174
175    #[inline(always)]
176    fn commit_pending_semi(&mut self) -> io::Result<()> {
177        Ok(())
178    }
179
180    #[inline(always)]
181    fn can_ignore_invalid_unicodes(&mut self) -> bool {
182        true
183    }
184}
185
186impl CharFreq {
187    pub fn scan(&mut self, s: &str, delta: i32) {
188        if delta == 0 {
189            return;
190        }
191
192        for &c in s.as_bytes() {
193            self.0[c as usize] += delta;
194        }
195    }
196
197    pub fn compute(p: &Program, idents: &Vec<Atom>) -> Self {
198        let mut a = {
199            let cm = Lrc::new(DummySourceMap);
200            let mut freq = Self::default();
201
202            {
203                let mut emitter = Emitter {
204                    cfg: swc_ecma_codegen::Config::default()
205                        .with_target(EsVersion::latest())
206                        .with_minify(true),
207                    cm,
208                    comments: None,
209                    wr: &mut freq,
210                };
211
212                emitter.emit_program(p).unwrap();
213            }
214
215            freq
216        };
217
218        let mut analyzer = CharFreqAnalyzer {
219            freq: Default::default(),
220        };
221
222        for ident in idents {
223            analyzer.freq.scan(ident, -1);
224        }
225
226        a += analyzer.freq;
227
228        a
229    }
230
231    pub fn compile(self) -> Base54Chars {
232        static BASE54_DEFAULT_CHARS: &[u8; 64] =
233            b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789$_";
234
235        let mut arr = BASE54_DEFAULT_CHARS
236            .iter()
237            .copied()
238            .map(|c| (self.0[c as usize], c))
239            .collect::<Vec<_>>();
240
241        arr.sort_by_key(|&(freq, _)| Reverse(freq));
242
243        let mut digits = Vec::with_capacity(10);
244        let mut alpha = Vec::with_capacity(54);
245        let mut all = Vec::with_capacity(64);
246
247        for (_, c) in arr {
248            if c.is_ascii_digit() {
249                digits.push(c);
250            } else {
251                alpha.push(c);
252            }
253        }
254        all.extend_from_slice(&alpha);
255        all.extend_from_slice(&digits);
256
257        #[cfg(feature = "debug")]
258        tracing::info!("Chars: {}", String::from_utf8_lossy(&all));
259
260        Base54Chars {
261            chars: all.try_into().unwrap(),
262        }
263    }
264}
265
266struct CharFreqAnalyzer {
267    freq: CharFreq,
268}
269
270impl AddAssign for CharFreq {
271    fn add_assign(&mut self, rhs: Self) {
272        for i in 0..256 {
273            self.0[i] += rhs.0[i];
274        }
275    }
276}
277
278impl Base54Chars {
279    /// givin a number, return a base54 encoded string
280    /// `usize -> [a-zA-Z$_][a-zA-Z$_0-9]*`
281    pub(crate) fn encode(&self, init: &mut usize, skip_reserved: bool) -> Atom {
282        let mut n = *init;
283
284        *init += 1;
285
286        let mut base = 54;
287
288        while n >= base {
289            n -= base;
290            base <<= 6;
291        }
292
293        // Not sure if this is ideal, but it's safe
294        let mut ret: ArrayVec<_, 14> = ArrayVec::new();
295
296        base /= 54;
297        let mut c = self.chars[n / base];
298        ret.push(c);
299
300        while base > 1 {
301            n %= base;
302            base >>= 6;
303            c = self.chars[n / base];
304
305            ret.push(c);
306        }
307
308        let s = unsafe {
309            // Safety: We are only using ascii characters
310            // Safety: The stack memory for ret is alive while creating Atom
311            Atom::from(std::str::from_utf8_unchecked(&ret))
312        };
313
314        if skip_reserved
315            && (s.is_reserved()
316                || s.is_reserved_in_strict_bind()
317                || s.is_reserved_in_strict_mode(true))
318        {
319            return self.encode(init, skip_reserved);
320        }
321
322        s
323    }
324}