swc_ecma_minifier/util/
base54.rs1use std::{cmp::Reverse, io, ops::AddAssign};
2
3use arrayvec::ArrayVec;
4use swc_atoms::Atom;
5use swc_common::{
6 sync::Lrc, BytePos, FileLines, FileName, Loc, SourceMapper, Span, SpanLinesError,
7};
8use swc_ecma_ast::*;
9use swc_ecma_codegen::{text_writer::WriteJs, Emitter};
10
11#[derive(Clone, Copy)]
12
13pub(crate) struct CharFreq([i32; 256]);
14
15#[derive(Clone, Copy)]
16pub(crate) struct Base54Chars {
17 chars: [u8; 64],
18}
19
20impl Default for CharFreq {
21 fn default() -> Self {
22 CharFreq([0; 256])
23 }
24}
25
26struct DummySourceMap;
27
28impl SourceMapper for DummySourceMap {
29 fn lookup_char_pos(&self, _: BytePos) -> Loc {
30 unreachable!()
31 }
32
33 fn span_to_lines(&self, _: Span) -> Result<FileLines, Box<SpanLinesError>> {
34 unreachable!()
35 }
36
37 fn span_to_string(&self, _: Span) -> String {
38 String::new()
39 }
40
41 fn span_to_filename(&self, _: Span) -> Lrc<FileName> {
42 FileName::Anon.into()
43 }
44
45 fn merge_spans(&self, _: Span, _: Span) -> Option<Span> {
46 None
47 }
48
49 fn call_span_if_macro(&self, sp: Span) -> Span {
50 sp
51 }
52
53 fn doctest_offset_line(&self, line: usize) -> usize {
54 line
55 }
56
57 fn span_to_snippet(&self, _: Span) -> Result<String, Box<swc_common::SpanSnippetError>> {
58 Ok(String::new())
59 }
60}
61
62impl SourceMapperExt for DummySourceMap {
63 fn get_code_map(&self) -> &dyn SourceMapper {
64 self
65 }
66}
67
68impl CharFreq {
69 #[inline(always)]
70 fn write(&mut self, data: &str) -> io::Result<()> {
71 self.scan(data, 1);
72 Ok(())
73 }
74}
75
76impl WriteJs for CharFreq {
77 #[inline(always)]
78 fn increase_indent(&mut self) -> io::Result<()> {
79 Ok(())
80 }
81
82 #[inline(always)]
83 fn decrease_indent(&mut self) -> io::Result<()> {
84 Ok(())
85 }
86
87 #[inline(always)]
88 fn write_semi(&mut self, _: Option<Span>) -> io::Result<()> {
89 Ok(())
90 }
91
92 #[inline(always)]
93 fn write_space(&mut self) -> io::Result<()> {
94 Ok(())
95 }
96
97 #[inline(always)]
98 fn write_keyword(&mut self, _: Option<Span>, s: &'static str) -> io::Result<()> {
99 self.write(s)?;
100 Ok(())
101 }
102
103 #[inline(always)]
104 fn write_operator(&mut self, _: Option<Span>, s: &str) -> io::Result<()> {
105 self.write(s)?;
106 Ok(())
107 }
108
109 #[inline(always)]
110 fn write_param(&mut self, s: &str) -> io::Result<()> {
111 self.write(s)?;
112 Ok(())
113 }
114
115 #[inline(always)]
116 fn write_property(&mut self, s: &str) -> io::Result<()> {
117 self.write(s)?;
118 Ok(())
119 }
120
121 #[inline(always)]
122 fn write_line(&mut self) -> io::Result<()> {
123 Ok(())
124 }
125
126 #[inline(always)]
127 fn write_lit(&mut self, _: Span, s: &str) -> io::Result<()> {
128 self.write(s)?;
129
130 Ok(())
131 }
132
133 #[inline(always)]
134 fn write_comment(&mut self, s: &str) -> io::Result<()> {
135 self.write(s)?;
136
137 Ok(())
138 }
139
140 #[inline(always)]
141 fn write_str_lit(&mut self, _: Span, s: &str) -> io::Result<()> {
142 self.write(s)?;
143
144 Ok(())
145 }
146
147 #[inline(always)]
148 fn write_str(&mut self, s: &str) -> io::Result<()> {
149 self.write(s)?;
150 Ok(())
151 }
152
153 #[inline(always)]
154 fn write_symbol(&mut self, _: Span, s: &str) -> io::Result<()> {
155 self.write(s)?;
156 Ok(())
157 }
158
159 #[inline(always)]
160 fn write_punct(&mut self, _: Option<Span>, s: &'static str) -> io::Result<()> {
161 self.write(s)?;
162 Ok(())
163 }
164
165 #[inline(always)]
166 fn care_about_srcmap(&self) -> bool {
167 false
168 }
169
170 #[inline(always)]
171 fn add_srcmap(&mut self, _: BytePos) -> io::Result<()> {
172 Ok(())
173 }
174
175 #[inline(always)]
176 fn commit_pending_semi(&mut self) -> io::Result<()> {
177 Ok(())
178 }
179
180 #[inline(always)]
181 fn can_ignore_invalid_unicodes(&mut self) -> bool {
182 true
183 }
184}
185
186impl CharFreq {
187 pub fn scan(&mut self, s: &str, delta: i32) {
188 if delta == 0 {
189 return;
190 }
191
192 for &c in s.as_bytes() {
193 self.0[c as usize] += delta;
194 }
195 }
196
197 pub fn compute(p: &Program, idents: &Vec<Atom>) -> Self {
198 let mut a = {
199 let cm = Lrc::new(DummySourceMap);
200 let mut freq = Self::default();
201
202 {
203 let mut emitter = Emitter {
204 cfg: swc_ecma_codegen::Config::default()
205 .with_target(EsVersion::latest())
206 .with_minify(true),
207 cm,
208 comments: None,
209 wr: &mut freq,
210 };
211
212 emitter.emit_program(p).unwrap();
213 }
214
215 freq
216 };
217
218 let mut analyzer = CharFreqAnalyzer {
219 freq: Default::default(),
220 };
221
222 for ident in idents {
223 analyzer.freq.scan(ident, -1);
224 }
225
226 a += analyzer.freq;
227
228 a
229 }
230
231 pub fn compile(self) -> Base54Chars {
232 static BASE54_DEFAULT_CHARS: &[u8; 64] =
233 b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789$_";
234
235 let mut arr = BASE54_DEFAULT_CHARS
236 .iter()
237 .copied()
238 .map(|c| (self.0[c as usize], c))
239 .collect::<Vec<_>>();
240
241 arr.sort_by_key(|&(freq, _)| Reverse(freq));
242
243 let mut digits = Vec::with_capacity(10);
244 let mut alpha = Vec::with_capacity(54);
245 let mut all = Vec::with_capacity(64);
246
247 for (_, c) in arr {
248 if c.is_ascii_digit() {
249 digits.push(c);
250 } else {
251 alpha.push(c);
252 }
253 }
254 all.extend_from_slice(&alpha);
255 all.extend_from_slice(&digits);
256
257 #[cfg(feature = "debug")]
258 tracing::info!("Chars: {}", String::from_utf8_lossy(&all));
259
260 Base54Chars {
261 chars: all.try_into().unwrap(),
262 }
263 }
264}
265
266struct CharFreqAnalyzer {
267 freq: CharFreq,
268}
269
270impl AddAssign for CharFreq {
271 fn add_assign(&mut self, rhs: Self) {
272 for i in 0..256 {
273 self.0[i] += rhs.0[i];
274 }
275 }
276}
277
278impl Base54Chars {
279 pub(crate) fn encode(&self, init: &mut usize, skip_reserved: bool) -> Atom {
282 let mut n = *init;
283
284 *init += 1;
285
286 let mut base = 54;
287
288 while n >= base {
289 n -= base;
290 base <<= 6;
291 }
292
293 let mut ret: ArrayVec<_, 14> = ArrayVec::new();
295
296 base /= 54;
297 let mut c = self.chars[n / base];
298 ret.push(c);
299
300 while base > 1 {
301 n %= base;
302 base >>= 6;
303 c = self.chars[n / base];
304
305 ret.push(c);
306 }
307
308 let s = unsafe {
309 Atom::from(std::str::from_utf8_unchecked(&ret))
312 };
313
314 if skip_reserved
315 && (s.is_reserved()
316 || s.is_reserved_in_strict_bind()
317 || s.is_reserved_in_strict_mode(true))
318 {
319 return self.encode(init, skip_reserved);
320 }
321
322 s
323 }
324}