swc_common/
input.rs

1use std::str;
2
3use debug_unreachable::debug_unreachable;
4
5use crate::syntax_pos::{BytePos, SourceFile};
6
7pub type SourceFileInput<'a> = StringInput<'a>;
8
9/// Implementation of [Input].
10#[derive(Clone)]
11pub struct StringInput<'a> {
12    last_pos: BytePos,
13    /// Current cursor
14    iter: str::Chars<'a>,
15    orig: &'a str,
16    /// Original start position.
17    orig_start: BytePos,
18    orig_end: BytePos,
19}
20
21impl<'a> StringInput<'a> {
22    /// `start` and `end` can be arbitrary value, but start should be less than
23    /// or equal to end.
24    ///
25    ///
26    /// `swc` get this value from [SourceMap] because code generator depends on
27    /// some methods of [SourceMap].
28    /// If you are not going to use methods from
29    /// [SourceMap], you may use any value.
30    pub fn new(src: &'a str, start: BytePos, end: BytePos) -> Self {
31        assert!(start <= end);
32
33        StringInput {
34            last_pos: start,
35            orig: src,
36            iter: src.chars(),
37            orig_start: start,
38            orig_end: end,
39        }
40    }
41
42    #[inline(always)]
43    pub fn as_str(&self) -> &str {
44        self.iter.as_str()
45    }
46
47    #[inline]
48    pub fn bump_bytes(&mut self, n: usize) {
49        unsafe {
50            // Safety: We only proceed, not go back.
51            self.reset_to(self.last_pos + BytePos(n as u32));
52        }
53    }
54
55    pub fn start_pos(&self) -> BytePos {
56        self.orig_start
57    }
58
59    pub fn end_pos(&self) -> BytePos {
60        self.orig_end
61    }
62}
63
64/// Creates an [Input] from [SourceFile]. This is an alias for
65///
66/// ```ignore
67///    StringInput::new(&fm.src, fm.start_pos, fm.end_pos)
68/// ```
69impl<'a> From<&'a SourceFile> for StringInput<'a> {
70    fn from(fm: &'a SourceFile) -> Self {
71        StringInput::new(&fm.src, fm.start_pos, fm.end_pos)
72    }
73}
74
75impl Input for StringInput<'_> {
76    #[inline]
77    fn cur(&self) -> Option<char> {
78        self.iter.clone().next()
79    }
80
81    #[inline]
82    fn peek(&self) -> Option<char> {
83        let mut iter = self.iter.clone();
84        // https://github.com/rust-lang/rust/blob/1.86.0/compiler/rustc_lexer/src/cursor.rs#L56 say `next` is faster.
85        iter.next();
86        iter.next()
87    }
88
89    #[inline]
90    fn peek_ahead(&self) -> Option<char> {
91        let mut iter = self.iter.clone();
92        // https://github.com/rust-lang/rust/blob/1.86.0/compiler/rustc_lexer/src/cursor.rs#L56 say `next` is faster
93        iter.next();
94        iter.next();
95        iter.next()
96    }
97
98    #[inline]
99    unsafe fn bump(&mut self) {
100        if let Some(c) = self.iter.next() {
101            self.last_pos = self.last_pos + BytePos((c.len_utf8()) as u32);
102        } else {
103            unsafe {
104                debug_unreachable!("bump should not be called when cur() == None");
105            }
106        }
107    }
108
109    #[inline]
110    fn cur_as_ascii(&self) -> Option<u8> {
111        let first_byte = *self.as_str().as_bytes().first()?;
112        if first_byte <= 0x7f {
113            Some(first_byte)
114        } else {
115            None
116        }
117    }
118
119    #[inline]
120    fn is_at_start(&self) -> bool {
121        self.orig_start == self.last_pos
122    }
123
124    /// TODO(kdy1): Remove this?
125    #[inline]
126    fn cur_pos(&self) -> BytePos {
127        self.last_pos
128    }
129
130    #[inline]
131    fn last_pos(&self) -> BytePos {
132        self.last_pos
133    }
134
135    #[inline]
136    unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &str {
137        debug_assert!(start <= end, "Cannot slice {start:?}..{end:?}");
138        let s = self.orig;
139
140        let start_idx = (start - self.orig_start).0 as usize;
141        let end_idx = (end - self.orig_start).0 as usize;
142
143        debug_assert!(end_idx <= s.len());
144
145        let ret = unsafe { s.get_unchecked(start_idx..end_idx) };
146
147        self.iter = unsafe { s.get_unchecked(end_idx..) }.chars();
148        self.last_pos = end;
149
150        ret
151    }
152
153    #[inline]
154    fn uncons_while<F>(&mut self, mut pred: F) -> &str
155    where
156        F: FnMut(char) -> bool,
157    {
158        let s = self.iter.as_str();
159        let mut last = 0;
160
161        for (i, c) in s.char_indices() {
162            if pred(c) {
163                last = i + c.len_utf8();
164            } else {
165                break;
166            }
167        }
168        debug_assert!(last <= s.len());
169        let ret = unsafe { s.get_unchecked(..last) };
170
171        self.last_pos = self.last_pos + BytePos(last as _);
172        self.iter = unsafe { s.get_unchecked(last..) }.chars();
173
174        ret
175    }
176
177    fn find<F>(&mut self, mut pred: F) -> Option<BytePos>
178    where
179        F: FnMut(char) -> bool,
180    {
181        let s = self.iter.as_str();
182        let mut last = 0;
183
184        for (i, c) in s.char_indices() {
185            if pred(c) {
186                last = i + c.len_utf8();
187                break;
188            }
189        }
190        if last == 0 {
191            return None;
192        }
193
194        debug_assert!(last <= s.len());
195
196        self.last_pos = self.last_pos + BytePos(last as _);
197        self.iter = unsafe { s.get_unchecked(last..) }.chars();
198
199        Some(self.last_pos)
200    }
201
202    #[inline]
203    unsafe fn reset_to(&mut self, to: BytePos) {
204        let orig = self.orig;
205        let idx = (to - self.orig_start).0 as usize;
206
207        debug_assert!(idx <= orig.len());
208        let s = unsafe { orig.get_unchecked(idx..) };
209        self.iter = s.chars();
210        self.last_pos = to;
211    }
212
213    #[inline]
214    fn is_byte(&self, c: u8) -> bool {
215        self.iter
216            .as_str()
217            .as_bytes()
218            .first()
219            .map(|b| *b == c)
220            .unwrap_or(false)
221    }
222
223    #[inline]
224    fn is_str(&self, s: &str) -> bool {
225        self.as_str().starts_with(s)
226    }
227
228    #[inline]
229    fn eat_byte(&mut self, c: u8) -> bool {
230        if self.is_byte(c) {
231            self.iter.next();
232            self.last_pos = self.last_pos + BytePos(1_u32);
233            true
234        } else {
235            false
236        }
237    }
238}
239
240pub trait Input: Clone {
241    fn cur(&self) -> Option<char>;
242    fn peek(&self) -> Option<char>;
243    fn peek_ahead(&self) -> Option<char>;
244
245    /// # Safety
246    ///
247    /// This should be called only when `cur()` returns `Some`. i.e.
248    /// when the Input is not empty.
249    unsafe fn bump(&mut self);
250
251    /// Returns [None] if it's end of input **or** current character is not an
252    /// ascii character.
253    #[inline]
254    fn cur_as_ascii(&self) -> Option<u8> {
255        self.cur().and_then(|i| {
256            if i.is_ascii() {
257                return Some(i as u8);
258            }
259            None
260        })
261    }
262
263    fn is_at_start(&self) -> bool;
264
265    fn cur_pos(&self) -> BytePos;
266
267    fn last_pos(&self) -> BytePos;
268
269    /// # Safety
270    ///
271    /// - start should be less than or equal to end.
272    /// - start and end should be in the valid range of input.
273    unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &str;
274
275    /// Takes items from stream, testing each one with predicate. returns the
276    /// range of items which passed predicate.
277    fn uncons_while<F>(&mut self, f: F) -> &str
278    where
279        F: FnMut(char) -> bool;
280
281    /// This method modifies [last_pos()] and [cur_pos()].
282    fn find<F>(&mut self, f: F) -> Option<BytePos>
283    where
284        F: FnMut(char) -> bool;
285
286    /// # Safety
287    ///
288    /// - `to` be in the valid range of input.
289    unsafe fn reset_to(&mut self, to: BytePos);
290
291    /// Implementors can override the method to make it faster.
292    ///
293    /// `c` must be ASCII.
294    #[inline]
295    #[allow(clippy::wrong_self_convention)]
296    fn is_byte(&self, c: u8) -> bool {
297        match self.cur() {
298            Some(ch) => ch == c as char,
299            _ => false,
300        }
301    }
302
303    /// Implementors can override the method to make it faster.
304    ///
305    /// `s` must be ASCII only.
306    fn is_str(&self, s: &str) -> bool;
307
308    /// Implementors can override the method to make it faster.
309    ///
310    /// `c` must be ASCII.
311    #[inline]
312    fn eat_byte(&mut self, c: u8) -> bool {
313        if self.is_byte(c) {
314            unsafe {
315                // Safety: We are sure that the input is not empty
316                self.bump();
317            }
318            true
319        } else {
320            false
321        }
322    }
323}
324
325#[cfg(test)]
326mod tests {
327    use std::sync::Arc;
328
329    use super::*;
330    use crate::{FileName, FilePathMapping, SourceMap};
331
332    fn with_test_sess<F>(src: &str, f: F)
333    where
334        F: FnOnce(StringInput<'_>),
335    {
336        let cm = Arc::new(SourceMap::new(FilePathMapping::empty()));
337        let fm = cm.new_source_file(FileName::Real("testing".into()).into(), src.into());
338
339        f((&*fm).into())
340    }
341
342    #[test]
343    fn src_input_slice_1() {
344        with_test_sess("foo/d", |mut i| {
345            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(2)) }, "f");
346            assert_eq!(i.last_pos, BytePos(2));
347            assert_eq!(i.cur(), Some('o'));
348
349            assert_eq!(unsafe { i.slice(BytePos(2), BytePos(4)) }, "oo");
350            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(4)) }, "foo");
351            assert_eq!(i.last_pos, BytePos(4));
352            assert_eq!(i.cur(), Some('/'));
353        });
354    }
355
356    #[test]
357    fn src_input_reset_to_1() {
358        with_test_sess("load", |mut i| {
359            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(3)) }, "lo");
360            assert_eq!(i.last_pos, BytePos(3));
361            assert_eq!(i.cur(), Some('a'));
362            unsafe { i.reset_to(BytePos(1)) };
363
364            assert_eq!(i.cur(), Some('l'));
365            assert_eq!(i.last_pos, BytePos(1));
366        });
367    }
368
369    #[test]
370    fn src_input_smoke_01() {
371        with_test_sess("foo/d", |mut i| {
372            assert_eq!(i.cur_pos(), BytePos(1));
373            assert_eq!(i.last_pos, BytePos(1));
374            assert_eq!(i.uncons_while(|c| c.is_alphabetic()), "foo");
375
376            // assert_eq!(i.cur_pos(), BytePos(4));
377            assert_eq!(i.last_pos, BytePos(4));
378            assert_eq!(i.cur(), Some('/'));
379
380            unsafe {
381                i.bump();
382            }
383            assert_eq!(i.last_pos, BytePos(5));
384            assert_eq!(i.cur(), Some('d'));
385
386            unsafe {
387                i.bump();
388            }
389            assert_eq!(i.last_pos, BytePos(6));
390            assert_eq!(i.cur(), None);
391        });
392    }
393
394    #[test]
395    fn src_input_find_01() {
396        with_test_sess("foo/d", |mut i| {
397            assert_eq!(i.cur_pos(), BytePos(1));
398            assert_eq!(i.last_pos, BytePos(1));
399
400            assert_eq!(i.find(|c| c == '/'), Some(BytePos(5)));
401            assert_eq!(i.last_pos, BytePos(5));
402            assert_eq!(i.cur(), Some('d'));
403        });
404    }
405
406    //    #[test]
407    //    fn src_input_smoke_02() {
408    //        let _ = crate::with_test_sess("℘℘/℘℘", | mut i| {
409    //            assert_eq!(i.iter.as_str(), "℘℘/℘℘");
410    //            assert_eq!(i.cur_pos(), BytePos(0));
411    //            assert_eq!(i.last_pos, BytePos(0));
412    //            assert_eq!(i.start_pos, BytePos(0));
413    //            assert_eq!(i.uncons_while(|c| c.is_ident_part()), "℘℘");
414    //
415    //            assert_eq!(i.iter.as_str(), "/℘℘");
416    //            assert_eq!(i.last_pos, BytePos(6));
417    //            assert_eq!(i.start_pos, BytePos(6));
418    //            assert_eq!(i.cur(), Some('/'));
419    //            i.bump();
420    //            assert_eq!(i.last_pos, BytePos(7));
421    //            assert_eq!(i.start_pos, BytePos(6));
422    //
423    //            assert_eq!(i.iter.as_str(), "℘℘");
424    //            assert_eq!(i.uncons_while(|c| c.is_ident_part()), "℘℘");
425    //            assert_eq!(i.last_pos, BytePos(13));
426    //            assert_eq!(i.start_pos, BytePos(13));
427    //
428    //            Ok(())
429    //        });
430    //    }
431}