swc_ecma_fast_parser/lexer/
number.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
//! Number literal processing for the lexer
//!
//! This module handles the parsing of numeric literals in
//! ECMAScript/TypeScript.

use std::borrow::Cow;

use swc_atoms::Atom;

use super::Lexer;
use crate::{
    error::{Error, ErrorKind, Result},
    token::{Token, TokenType, TokenValue},
};

// Digit value lookup table for fast parsing
static DIGIT_VALUES: [u8; 256] = {
    let mut table = [255u8; 256];

    // Decimal digits
    let mut i = 0;
    while i < 10 {
        table[b'0' as usize + i] = i as u8;
        i += 1;
    }

    // Hex digits
    let mut i = 0;
    while i < 6 {
        table[b'a' as usize + i] = (10 + i) as u8;
        table[b'A' as usize + i] = (10 + i) as u8;
        i += 1;
    }

    table
};

impl<'a> Lexer<'a> {
    /// Read a numeric literal
    #[inline]
    pub(super) fn read_number(&mut self) -> Result<Token> {
        let start_pos = self.start_pos;
        let start_idx = start_pos.0;

        // Check for leading dot (e.g. .123)
        let starts_with_dot = self.cursor.peek() == Some(b'.');
        if starts_with_dot {
            self.cursor.advance();

            // Make sure it's followed by a digit
            if !matches!(self.cursor.peek(), Some(b'0'..=b'9')) {
                // Just a dot, not a number
                return Ok(Token::new(
                    TokenType::Dot,
                    self.span(),
                    bool::from(self.had_line_break),
                    TokenValue::None,
                ));
            }

            // Read digits after the dot
            self.cursor
                .advance_while(|ch| matches!(ch, b'0'..=b'9' | b'_'));

            // Read exponent if present
            if matches!(self.cursor.peek(), Some(b'e') | Some(b'E')) {
                self.cursor.advance();

                // Optional sign
                if matches!(self.cursor.peek(), Some(b'+') | Some(b'-')) {
                    self.cursor.advance();
                }

                // Must have at least one digit in exponent
                if !matches!(self.cursor.peek(), Some(b'0'..=b'9')) {
                    let span = self.span();
                    return Err(Error {
                        kind: ErrorKind::InvalidNumber {
                            reason: "invalid numeric separator",
                        },
                        span,
                    });
                }

                // Read exponent digits
                self.cursor
                    .advance_while(|ch| matches!(ch, b'0'..=b'9' | b'_'));
            }

            // Parse as decimal
            let value = self.parse_decimal_number(start_idx, true);

            // Create the token
            let raw_str = self.extract_number_str(start_idx);
            let span = self.span();
            return Ok(Token::new(
                TokenType::Num,
                span,
                bool::from(self.had_line_break),
                TokenValue::Num {
                    value,
                    raw: Atom::from(raw_str),
                },
            ));
        }

        // First check for a binary, octal, or hex literal
        let mut is_binary = false;
        let mut is_octal = false;
        let mut is_hex = false;

        if !starts_with_dot && self.cursor.peek() == Some(b'0') {
            self.cursor.advance();

            match self.cursor.peek() {
                // Binary literal: 0b or 0B
                Some(b'b') | Some(b'B') => {
                    self.cursor.advance();
                    is_binary = true;

                    // Must have at least one binary digit
                    if !matches!(self.cursor.peek(), Some(b'0'..=b'1')) {
                        let span = self.span();
                        return Err(Error {
                            kind: ErrorKind::InvalidNumber {
                                reason: "expected binary digit",
                            },
                            span,
                        });
                    }
                }
                // Octal literal: 0o or 0O
                Some(b'o') | Some(b'O') => {
                    self.cursor.advance();
                    is_octal = true;

                    // Must have at least one octal digit
                    if !matches!(self.cursor.peek(), Some(b'0'..=b'7')) {
                        let span = self.span();
                        return Err(Error {
                            kind: ErrorKind::InvalidNumber {
                                reason: "expected octal digit",
                            },
                            span,
                        });
                    }
                }
                // Hex literal: 0x or 0X
                Some(b'x') | Some(b'X') => {
                    self.cursor.advance();
                    is_hex = true;

                    // Must have at least one hex digit
                    if !matches!(
                        self.cursor.peek(),
                        Some(b'0'..=b'9') | Some(b'a'..=b'f') | Some(b'A'..=b'F')
                    ) {
                        let span = self.span();
                        return Err(Error {
                            kind: ErrorKind::InvalidNumber {
                                reason: "expected hex digit",
                            },
                            span,
                        });
                    }
                }
                // Decimal literal starting with 0
                _ => {}
            }
        }

        // Read the rest of the digits
        if is_binary {
            // Binary literals: 0b[01]+
            self.cursor
                .advance_while(|ch| matches!(ch, b'0'..=b'1' | b'_'));
        } else if is_octal {
            // Octal literals: 0o[0-7]+
            self.cursor
                .advance_while(|ch| matches!(ch, b'0'..=b'7' | b'_'));
        } else if is_hex {
            // Hex literals: 0x[0-9a-fA-F]+
            self.cursor
                .advance_while(|ch| matches!(ch, b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F' | b'_'));
        } else {
            // Decimal literals

            // Read integer part
            if !starts_with_dot {
                self.cursor
                    .advance_while(|ch| matches!(ch, b'0'..=b'9' | b'_'));
            }

            // Read fractional part if present
            if self.cursor.peek() == Some(b'.')
                && (starts_with_dot || !matches!(self.cursor.peek_at(1), Some(b'.')))
            {
                // Consume the dot
                self.cursor.advance();

                // Read decimal digits after the dot
                self.cursor
                    .advance_while(|ch| matches!(ch, b'0'..=b'9' | b'_'));
            }

            // Read exponent part if present
            if matches!(self.cursor.peek(), Some(b'e') | Some(b'E')) {
                self.cursor.advance();

                // Optional sign
                if matches!(self.cursor.peek(), Some(b'+') | Some(b'-')) {
                    self.cursor.advance();
                }

                // Must have at least one digit in exponent
                if !matches!(self.cursor.peek(), Some(b'0'..=b'9')) {
                    let span = self.span();
                    return Err(Error {
                        kind: ErrorKind::InvalidNumber {
                            reason: "invalid numeric separator",
                        },
                        span,
                    });
                }

                // Read exponent digits
                self.cursor
                    .advance_while(|ch| matches!(ch, b'0'..=b'9' | b'_'));
            }
        }

        // Check if this is a BigInt literal (ends with n)
        let is_bigint = self.cursor.peek() == Some(b'n');
        if is_bigint {
            self.cursor.advance(); // Consume the 'n'

            // BigInt can't have decimal points or exponents
            if !is_binary && !is_octal && !is_hex {
                let raw_str = self.extract_number_str(start_idx);
                if raw_str.contains('.') || raw_str.contains('e') || raw_str.contains('E') {
                    let span = self.span();
                    return Err(Error {
                        kind: ErrorKind::InvalidBigInt,
                        span,
                    });
                }
            }

            return self.create_bigint_token(start_idx);
        }

        // Parse the number directly for faster processing
        let value = if is_binary {
            self.parse_binary_number(start_idx)
        } else if is_octal {
            self.parse_octal_number(start_idx)
        } else if is_hex {
            self.parse_hex_number(start_idx)
        } else {
            self.parse_decimal_number(start_idx, starts_with_dot)
        };

        // Extract the raw string representation
        let raw_str = self.extract_number_str(start_idx);

        // Create and return the token
        let span = self.span();
        Ok(Token::new(
            TokenType::Num,
            span,
            bool::from(self.had_line_break),
            TokenValue::Num {
                value,
                raw: Atom::from(raw_str),
            },
        ))
    }

    /// Extract the raw string representation of a number
    #[inline]
    fn extract_number_str(&self, start_idx: u32) -> Cow<'a, str> {
        let end_idx = self.cursor.position();
        let num_slice = unsafe { self.cursor.slice_unchecked(start_idx, end_idx) };
        // Filter out the underscore separators
        if num_slice.contains(&b'_') {
            let mut result = String::with_capacity(num_slice.len());
            for &byte in num_slice {
                if byte != b'_' {
                    result.push(byte as char);
                }
            }
            Cow::Owned(result)
        } else {
            // Fast path: no underscores
            Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(num_slice) })
        }
    }

    /// Parse a binary number (0b...)
    #[inline]
    fn parse_binary_number(&self, start_idx: u32) -> f64 {
        let start = start_idx + 2; // Skip '0b'
        let end = self.cursor.position();

        let mut value: u64 = 0;
        for i in start..end {
            let byte = unsafe { *self.cursor.slice_unchecked(i, i + 1).get_unchecked(0) };
            if byte == b'_' {
                continue;
            }
            value = value * 2 + (byte - b'0') as u64;
        }

        value as f64
    }

    /// Parse an octal number (0o...)
    #[inline]
    fn parse_octal_number(&self, start_idx: u32) -> f64 {
        let start = start_idx + 2; // Skip '0o'
        let end = self.cursor.position();

        let mut value: u64 = 0;
        for i in start..end {
            let byte = unsafe { *self.cursor.slice_unchecked(i, i + 1).get_unchecked(0) };
            if byte == b'_' {
                continue;
            }
            value = value * 8 + (byte - b'0') as u64;
        }

        value as f64
    }

    /// Parse a hexadecimal number (0x...)
    #[inline]
    fn parse_hex_number(&self, start_idx: u32) -> f64 {
        let start = start_idx + 2; // Skip '0x'
        let end = self.cursor.position();

        let mut value: u64 = 0;
        for i in start..end {
            let byte = unsafe { *self.cursor.slice_unchecked(i, i + 1).get_unchecked(0) };
            if byte == b'_' {
                continue;
            }
            let digit = DIGIT_VALUES[byte as usize];
            value = value * 16 + digit as u64;
        }

        value as f64
    }

    /// Parse a decimal number
    #[inline]
    fn parse_decimal_number(&self, start_idx: u32, starts_with_dot: bool) -> f64 {
        // Extract the raw string representation
        let raw_str = self.extract_number_str(start_idx);

        // Special case for dot-prefixed numbers
        if starts_with_dot {
            // High-performance parsing for .123 format
            // Use a stack-allocated buffer to avoid heap allocation
            const STACK_BUF_SIZE: usize = 32;
            if raw_str.len() < STACK_BUF_SIZE - 1 {
                // Create a stack-allocated buffer with a leading '0'
                let mut buffer = [0u8; STACK_BUF_SIZE];
                buffer[0] = b'0';

                // Fast memcpy of the original bytes (including the dot)
                let src_bytes = raw_str.as_bytes();
                let src_len = src_bytes.len();

                // SAFETY: We've checked that src_len < STACK_BUF_SIZE - 1
                unsafe {
                    std::ptr::copy_nonoverlapping(
                        src_bytes.as_ptr(),
                        buffer.as_mut_ptr().add(1),
                        src_len,
                    );
                    // Parse from the buffer - avoid allocation
                    return std::str::from_utf8_unchecked(&buffer[0..src_len + 1])
                        .parse::<f64>()
                        .unwrap_or(f64::NAN);
                }
            } else {
                // Fall back to string with capacity for very long numbers (rare case)
                let mut with_leading_zero = String::with_capacity(raw_str.len() + 1);
                with_leading_zero.push('0');
                with_leading_zero.push_str(&raw_str);
                return with_leading_zero.parse::<f64>().unwrap_or(f64::NAN);
            }
        }

        // Standard case - use Rust's parser
        raw_str.parse::<f64>().unwrap_or(f64::NAN)
    }

    /// Create a BigInt token
    #[inline]
    fn create_bigint_token(&self, start_idx: u32) -> Result<Token> {
        use num_bigint::BigInt;

        let end_idx = self.cursor.position();
        let span = self.span();

        // Extract the raw string excluding the 'n' suffix
        let raw_str = {
            let num_slice = unsafe { self.cursor.slice_unchecked(start_idx, end_idx - 1) };
            if num_slice.contains(&b'_') {
                // Filter out underscores
                let mut result = String::with_capacity(num_slice.len());
                for &byte in num_slice {
                    if byte != b'_' {
                        result.push(byte as char);
                    }
                }
                Cow::Owned(result)
            } else {
                // Fast path: no underscores
                Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(num_slice) })
            }
        };

        // Parse the BigInt value
        let value = if raw_str.starts_with("0b") || raw_str.starts_with("0B") {
            // Binary
            BigInt::parse_bytes(&raw_str.as_bytes()[2..], 2)
        } else if raw_str.starts_with("0o") || raw_str.starts_with("0O") {
            // Octal
            BigInt::parse_bytes(&raw_str.as_bytes()[2..], 8)
        } else if raw_str.starts_with("0x") || raw_str.starts_with("0X") {
            // Hexadecimal
            BigInt::parse_bytes(&raw_str.as_bytes()[2..], 16)
        } else {
            // Decimal
            BigInt::parse_bytes(raw_str.as_bytes(), 10)
        };

        // Create the token
        if let Some(value) = value {
            Ok(Token::new(
                TokenType::BigInt,
                span,
                bool::from(self.had_line_break),
                TokenValue::BigInt {
                    value: Box::new(value),
                    raw: Atom::from(raw_str),
                },
            ))
        } else {
            Err(Error {
                kind: ErrorKind::InvalidBigInt,
                span,
            })
        }
    }
}