swc_ecma_fast_parser/lexer/cursor.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
//! Byte-level cursor for fast input traversal
//!
//! This cursor operates directly on UTF-8 bytes for maximum performance.
use assume::assume;
use swc_common::BytePos;
use crate::util::unlikely;
/// High-performance cursor for traversing input bytes
#[repr(C)] // Ensure predictable memory layout for better cache behavior
pub struct Cursor<'a> {
/// Current position in bytes
pos: u32,
/// Length of the input in bytes
len: u32,
/// Input source as bytes
input: &'a [u8],
}
impl<'a> Cursor<'a> {
/// Create a new cursor from a string
#[inline(always)]
pub fn new(input: &'a str) -> Self {
let bytes = input.as_bytes();
Self {
input: bytes,
pos: 0,
len: bytes.len() as u32,
}
}
/// Get the current position as BytePos
#[inline(always)]
pub fn pos(&self) -> BytePos {
BytePos(self.pos)
}
/// Check if the cursor is at the end of the input
#[inline(always)]
pub fn is_eof(&self) -> bool {
self.pos >= self.len
}
/// Peek at the current byte without advancing
#[inline(always)]
pub fn peek(&self) -> Option<u8> {
if unlikely(self.is_eof()) {
None
} else {
// SAFETY: We've checked that pos < len
Some(unsafe { *self.input.get_unchecked(self.pos as usize) })
}
}
/// Peek at a byte at a specific offset from the current position
#[inline(always)]
pub fn peek_at(&self, offset: u32) -> Option<u8> {
let target_pos = self.pos + offset;
if unlikely(target_pos >= self.len) {
None
} else {
// SAFETY: We've checked that target_pos < len
Some(unsafe { *self.input.get_unchecked(target_pos as usize) })
}
}
/// Peek at multiple bytes without advancing
#[inline(always)]
pub fn peek_n(&self, n: u32) -> &[u8] {
let end = (self.pos + n).min(self.len);
// SAFETY: We've ensured end <= len
unsafe { self.input.get_unchecked(self.pos as usize..end as usize) }
}
/// Advance the cursor by one byte
#[inline(always)]
pub fn advance(&mut self) {
assume!(unsafe: !self.is_eof());
self.pos += 1;
}
/// Advance the cursor by n bytes
#[inline(always)]
pub fn advance_n(&mut self, n: u32) {
assume!(unsafe: self.pos + n <= self.len);
self.pos += n;
}
/// Advance until the predicate returns false or EOF is reached
#[inline]
pub fn advance_while<F>(&mut self, predicate: F) -> u32
where
F: Fn(u8) -> bool,
{
let start = self.pos;
self.advance_while_scalar(&predicate);
self.pos - start
}
/// Scalar (non-SIMD) implementation of advance_while
#[inline]
fn advance_while_scalar<F>(&mut self, predicate: &F)
where
F: Fn(u8) -> bool,
{
// Warning: Do not scalarize if we do not use SIMD
// const BATCH_SIZE: u32 = 32;
// // Process in batches if we have more than BATCH_SIZE bytes
// while self.pos + BATCH_SIZE <= self.len {
// let mut should_stop = false;
// // Check all bytes in the batch
// for i in 0..BATCH_SIZE {
// // SAFETY: We've verified bounds above
// let byte = unsafe { *self.input.get_unchecked((self.pos + i) as
// usize) }; if !predicate(byte) {
// should_stop = true;
// break;
// }
// }
// if should_stop {
// // Found stopping byte, switch to byte-by-byte
// break;
// }
// // Skip the entire batch
// self.pos += BATCH_SIZE;
// }
// Byte-by-byte for the remainder
while let Some(byte) = self.peek() {
if !predicate(byte) {
break;
}
self.advance();
}
}
/// Get slice from the current position to the end
#[inline(always)]
pub fn rest(&self) -> &'a [u8] {
// SAFETY: pos is always <= len
unsafe { self.input.get_unchecked(self.pos as usize..) }
}
/// Get a slice of the input without bounds checking.
///
/// # Safety
///
/// The caller must ensure that `start <= end <= self.len`.
#[inline(always)]
pub unsafe fn slice_unchecked(&self, start: u32, end: u32) -> &'a [u8] {
assume!(unsafe: start <= end);
assume!(unsafe: end <= self.len);
self.input.get_unchecked(start as usize..end as usize)
}
/// Get the current position
#[inline(always)]
pub fn position(&self) -> u32 {
self.pos
}
/// Reset the cursor to a specific position
#[inline(always)]
pub fn reset_to(&mut self, pos: BytePos) {
self.pos = pos.0;
}
}