swc_ecma_fast_parser/lexer/
cursor.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
//! Byte-level cursor for fast input traversal
//!
//! This cursor operates directly on UTF-8 bytes for maximum performance.

use assume::assume;
use swc_common::BytePos;

use crate::util::unlikely;

/// High-performance cursor for traversing input bytes
#[repr(C)] // Ensure predictable memory layout for better cache behavior
pub struct Cursor<'a> {
    /// Current position in bytes
    pos: u32,

    /// Length of the input in bytes
    len: u32,

    /// Input source as bytes
    input: &'a [u8],
}

impl<'a> Cursor<'a> {
    /// Create a new cursor from a string
    #[inline(always)]
    pub fn new(input: &'a str) -> Self {
        let bytes = input.as_bytes();
        Self {
            input: bytes,
            pos: 0,
            len: bytes.len() as u32,
        }
    }

    /// Get the current position as BytePos
    #[inline(always)]
    pub fn pos(&self) -> BytePos {
        BytePos(self.pos)
    }

    /// Check if the cursor is at the end of the input
    #[inline(always)]
    pub fn is_eof(&self) -> bool {
        self.pos >= self.len
    }

    /// Peek at the current byte without advancing
    #[inline(always)]
    pub fn peek(&self) -> Option<u8> {
        if unlikely(self.is_eof()) {
            None
        } else {
            // SAFETY: We've checked that pos < len
            Some(unsafe { *self.input.get_unchecked(self.pos as usize) })
        }
    }

    /// Peek at a byte at a specific offset from the current position
    #[inline(always)]
    pub fn peek_at(&self, offset: u32) -> Option<u8> {
        let target_pos = self.pos + offset;
        if unlikely(target_pos >= self.len) {
            None
        } else {
            // SAFETY: We've checked that target_pos < len
            Some(unsafe { *self.input.get_unchecked(target_pos as usize) })
        }
    }

    /// Peek at multiple bytes without advancing
    #[inline(always)]
    pub fn peek_n(&self, n: u32) -> &[u8] {
        let end = (self.pos + n).min(self.len);
        // SAFETY: We've ensured end <= len
        unsafe { self.input.get_unchecked(self.pos as usize..end as usize) }
    }

    /// Advance the cursor by one byte
    #[inline(always)]
    pub fn advance(&mut self) {
        assume!(unsafe: !self.is_eof());
        self.pos += 1;
    }

    /// Advance the cursor by n bytes
    #[inline(always)]
    pub fn advance_n(&mut self, n: u32) {
        assume!(unsafe: self.pos + n <= self.len);
        self.pos += n;
    }

    /// Advance until the predicate returns false or EOF is reached
    #[inline]
    pub fn advance_while<F>(&mut self, predicate: F) -> u32
    where
        F: Fn(u8) -> bool,
    {
        let start = self.pos;

        self.advance_while_scalar(&predicate);

        self.pos - start
    }

    /// Scalar (non-SIMD) implementation of advance_while
    #[inline]
    fn advance_while_scalar<F>(&mut self, predicate: &F)
    where
        F: Fn(u8) -> bool,
    {
        // Warning: Do not scalarize if we do not use SIMD
        // const BATCH_SIZE: u32 = 32;

        // // Process in batches if we have more than BATCH_SIZE bytes
        // while self.pos + BATCH_SIZE <= self.len {
        //     let mut should_stop = false;

        //     // Check all bytes in the batch
        //     for i in 0..BATCH_SIZE {
        //         // SAFETY: We've verified bounds above
        //         let byte = unsafe { *self.input.get_unchecked((self.pos + i) as
        // usize) };         if !predicate(byte) {
        //             should_stop = true;
        //             break;
        //         }
        //     }

        //     if should_stop {
        //         // Found stopping byte, switch to byte-by-byte
        //         break;
        //     }

        //     // Skip the entire batch
        //     self.pos += BATCH_SIZE;
        // }

        // Byte-by-byte for the remainder
        while let Some(byte) = self.peek() {
            if !predicate(byte) {
                break;
            }
            self.advance();
        }
    }

    /// Get slice from the current position to the end
    #[inline(always)]
    pub fn rest(&self) -> &'a [u8] {
        // SAFETY: pos is always <= len
        unsafe { self.input.get_unchecked(self.pos as usize..) }
    }

    /// Get a slice of the input without bounds checking.
    ///
    /// # Safety
    ///
    /// The caller must ensure that `start <= end <= self.len`.
    #[inline(always)]
    pub unsafe fn slice_unchecked(&self, start: u32, end: u32) -> &'a [u8] {
        assume!(unsafe: start <= end);
        assume!(unsafe: end <= self.len);
        self.input.get_unchecked(start as usize..end as usize)
    }

    /// Get the current position
    #[inline(always)]
    pub fn position(&self) -> u32 {
        self.pos
    }

    /// Reset the cursor to a specific position
    #[inline(always)]
    pub fn reset_to(&mut self, pos: BytePos) {
        self.pos = pos.0;
    }
}