swc_common/
input.rs

1use std::str;
2
3use debug_unreachable::debug_unreachable;
4
5use crate::syntax_pos::{BytePos, SourceFile};
6
7pub type SourceFileInput<'a> = StringInput<'a>;
8
9/// Implementation of [Input].
10#[derive(Clone)]
11pub struct StringInput<'a> {
12    last_pos: BytePos,
13    /// Current cursor
14    iter: str::Chars<'a>,
15    orig: &'a str,
16    /// Original start position.
17    orig_start: BytePos,
18    orig_end: BytePos,
19}
20
21impl<'a> StringInput<'a> {
22    /// `start` and `end` can be arbitrary value, but start should be less than
23    /// or equal to end.
24    ///
25    ///
26    /// `swc` get this value from [SourceMap] because code generator depends on
27    /// some methods of [SourceMap].
28    /// If you are not going to use methods from
29    /// [SourceMap], you may use any value.
30    pub fn new(src: &'a str, start: BytePos, end: BytePos) -> Self {
31        assert!(start <= end);
32
33        StringInput {
34            last_pos: start,
35            orig: src,
36            iter: src.chars(),
37            orig_start: start,
38            orig_end: end,
39        }
40    }
41
42    #[inline(always)]
43    pub fn as_str(&self) -> &str {
44        self.iter.as_str()
45    }
46
47    #[inline(always)]
48    /// Compared to [StringInput::slice], this function doesn't set
49    /// `self.last_pos = end` because in most cases this property has been
50    /// satisfied but the compiler cannot optimize it.
51    ///
52    /// Caution: This function should only be used internally and will be
53    /// changed in the future.
54    ///
55    /// # Safety
56    /// - start should be less than or equal to end.
57    /// - start and end should be in the valid range of input.
58    pub unsafe fn slice_to_cur(&mut self, start: BytePos) -> &'a str {
59        let end = self.last_pos;
60
61        debug_assert!(start <= end, "Cannot slice {start:?}..{end:?}");
62        let s = self.orig;
63
64        let start_idx = (start - self.orig_start).0 as usize;
65        let end_idx = (end - self.orig_start).0 as usize;
66
67        debug_assert!(end_idx <= s.len());
68
69        let ret = unsafe { s.get_unchecked(start_idx..end_idx) };
70
71        self.iter = unsafe { s.get_unchecked(end_idx..) }.chars();
72
73        ret
74    }
75
76    #[inline]
77    pub fn bump_bytes(&mut self, n: usize) {
78        unsafe {
79            // Safety: We only proceed, not go back.
80            self.reset_to(self.last_pos + BytePos(n as u32));
81        }
82    }
83
84    pub fn start_pos(&self) -> BytePos {
85        self.orig_start
86    }
87
88    #[inline(always)]
89    pub fn end_pos(&self) -> BytePos {
90        self.orig_end
91    }
92}
93
94/// Creates an [Input] from [SourceFile]. This is an alias for
95///
96/// ```ignore
97///    StringInput::new(&fm.src, fm.start_pos, fm.end_pos)
98/// ```
99impl<'a> From<&'a SourceFile> for StringInput<'a> {
100    fn from(fm: &'a SourceFile) -> Self {
101        StringInput::new(&fm.src, fm.start_pos, fm.end_pos)
102    }
103}
104
105impl<'a> Input<'a> for StringInput<'a> {
106    #[inline]
107    fn cur(&self) -> Option<char> {
108        self.iter.clone().next()
109    }
110
111    #[inline]
112    fn peek(&self) -> Option<char> {
113        let mut iter = self.iter.clone();
114        // https://github.com/rust-lang/rust/blob/1.86.0/compiler/rustc_lexer/src/cursor.rs#L56 say `next` is faster.
115        iter.next();
116        iter.next()
117    }
118
119    #[inline]
120    fn peek_ahead(&self) -> Option<char> {
121        let mut iter = self.iter.clone();
122        // https://github.com/rust-lang/rust/blob/1.86.0/compiler/rustc_lexer/src/cursor.rs#L56 say `next` is faster
123        iter.next();
124        iter.next();
125        iter.next()
126    }
127
128    #[inline]
129    unsafe fn bump(&mut self) {
130        if let Some(c) = self.iter.next() {
131            self.last_pos = self.last_pos + BytePos((c.len_utf8()) as u32);
132        } else {
133            unsafe {
134                debug_unreachable!("bump should not be called when cur() == None");
135            }
136        }
137    }
138
139    #[inline]
140    fn cur_as_ascii(&self) -> Option<u8> {
141        let first_byte = *self.as_str().as_bytes().first()?;
142        if first_byte <= 0x7f {
143            Some(first_byte)
144        } else {
145            None
146        }
147    }
148
149    #[inline]
150    fn is_at_start(&self) -> bool {
151        self.orig_start == self.last_pos
152    }
153
154    /// TODO(kdy1): Remove this?
155    #[inline]
156    fn cur_pos(&self) -> BytePos {
157        self.last_pos
158    }
159
160    #[inline]
161    fn last_pos(&self) -> BytePos {
162        self.last_pos
163    }
164
165    #[inline]
166    unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &'a str {
167        debug_assert!(start <= end, "Cannot slice {start:?}..{end:?}");
168        let s = self.orig;
169
170        let start_idx = (start - self.orig_start).0 as usize;
171        let end_idx = (end - self.orig_start).0 as usize;
172
173        debug_assert!(end_idx <= s.len());
174
175        let ret = unsafe { s.get_unchecked(start_idx..end_idx) };
176
177        self.iter = unsafe { s.get_unchecked(end_idx..) }.chars();
178        self.last_pos = end;
179
180        ret
181    }
182
183    #[inline]
184    fn uncons_while<F>(&mut self, mut pred: F) -> &'a str
185    where
186        F: FnMut(char) -> bool,
187    {
188        let last = {
189            let mut last = 0;
190            for c in self.iter.clone() {
191                if pred(c) {
192                    last += c.len_utf8();
193                } else {
194                    break;
195                }
196            }
197            last
198        };
199
200        let s = self.iter.as_str();
201        debug_assert!(last <= s.len());
202        let ret = unsafe { s.get_unchecked(..last) };
203
204        self.last_pos = self.last_pos + BytePos(last as _);
205        self.iter = unsafe { s.get_unchecked(last..) }.chars();
206
207        ret
208    }
209
210    #[inline]
211    unsafe fn reset_to(&mut self, to: BytePos) {
212        if self.last_pos == to {
213            // No need to reset.
214            return;
215        }
216
217        let orig = self.orig;
218        let idx = (to - self.orig_start).0 as usize;
219
220        debug_assert!(idx <= orig.len());
221        let s = unsafe { orig.get_unchecked(idx..) };
222        self.iter = s.chars();
223        self.last_pos = to;
224    }
225
226    #[inline]
227    fn is_byte(&self, c: u8) -> bool {
228        self.iter
229            .as_str()
230            .as_bytes()
231            .first()
232            .map(|b| *b == c)
233            .unwrap_or(false)
234    }
235
236    #[inline]
237    fn is_str(&self, s: &str) -> bool {
238        self.as_str().starts_with(s)
239    }
240
241    #[inline]
242    fn eat_byte(&mut self, c: u8) -> bool {
243        if self.is_byte(c) {
244            self.iter.next();
245            self.last_pos = self.last_pos + BytePos(1_u32);
246            true
247        } else {
248            false
249        }
250    }
251}
252
253pub trait Input<'a>: Clone {
254    fn cur(&self) -> Option<char>;
255    fn peek(&self) -> Option<char>;
256    fn peek_ahead(&self) -> Option<char>;
257
258    /// # Safety
259    ///
260    /// This should be called only when `cur()` returns `Some`. i.e.
261    /// when the Input is not empty.
262    unsafe fn bump(&mut self);
263
264    /// Returns [None] if it's end of input **or** current character is not an
265    /// ascii character.
266    #[inline]
267    fn cur_as_ascii(&self) -> Option<u8> {
268        self.cur().and_then(|i| {
269            if i.is_ascii() {
270                return Some(i as u8);
271            }
272            None
273        })
274    }
275
276    fn is_at_start(&self) -> bool;
277
278    fn cur_pos(&self) -> BytePos;
279
280    fn last_pos(&self) -> BytePos;
281
282    /// # Safety
283    ///
284    /// - start should be less than or equal to end.
285    /// - start and end should be in the valid range of input.
286    unsafe fn slice(&mut self, start: BytePos, end: BytePos) -> &'a str;
287
288    /// Takes items from stream, testing each one with predicate. returns the
289    /// range of items which passed predicate.
290    fn uncons_while<F>(&mut self, f: F) -> &'a str
291    where
292        F: FnMut(char) -> bool;
293
294    /// # Safety
295    ///
296    /// - `to` be in the valid range of input.
297    unsafe fn reset_to(&mut self, to: BytePos);
298
299    /// Implementors can override the method to make it faster.
300    ///
301    /// `c` must be ASCII.
302    #[inline]
303    #[allow(clippy::wrong_self_convention)]
304    fn is_byte(&self, c: u8) -> bool {
305        match self.cur() {
306            Some(ch) => ch == c as char,
307            _ => false,
308        }
309    }
310
311    /// Implementors can override the method to make it faster.
312    ///
313    /// `s` must be ASCII only.
314    fn is_str(&self, s: &str) -> bool;
315
316    /// Implementors can override the method to make it faster.
317    ///
318    /// `c` must be ASCII.
319    #[inline]
320    fn eat_byte(&mut self, c: u8) -> bool {
321        if self.is_byte(c) {
322            unsafe {
323                // Safety: We are sure that the input is not empty
324                self.bump();
325            }
326            true
327        } else {
328            false
329        }
330    }
331}
332
333#[cfg(test)]
334mod tests {
335    use super::*;
336    use crate::{sync::Lrc, FileName, FilePathMapping, SourceMap};
337
338    fn with_test_sess<F>(src: &'static str, f: F)
339    where
340        F: FnOnce(StringInput<'_>),
341    {
342        let cm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
343        let fm = cm.new_source_file(FileName::Real("testing".into()).into(), src);
344
345        f((&*fm).into())
346    }
347
348    #[test]
349    fn src_input_slice_1() {
350        with_test_sess("foo/d", |mut i| {
351            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(2)) }, "f");
352            assert_eq!(i.last_pos, BytePos(2));
353            assert_eq!(i.cur(), Some('o'));
354
355            assert_eq!(unsafe { i.slice(BytePos(2), BytePos(4)) }, "oo");
356            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(4)) }, "foo");
357            assert_eq!(i.last_pos, BytePos(4));
358            assert_eq!(i.cur(), Some('/'));
359        });
360    }
361
362    #[test]
363    fn src_input_reset_to_1() {
364        with_test_sess("load", |mut i| {
365            assert_eq!(unsafe { i.slice(BytePos(1), BytePos(3)) }, "lo");
366            assert_eq!(i.last_pos, BytePos(3));
367            assert_eq!(i.cur(), Some('a'));
368            unsafe { i.reset_to(BytePos(1)) };
369
370            assert_eq!(i.cur(), Some('l'));
371            assert_eq!(i.last_pos, BytePos(1));
372        });
373    }
374
375    #[test]
376    fn src_input_smoke_01() {
377        with_test_sess("foo/d", |mut i| {
378            assert_eq!(i.cur_pos(), BytePos(1));
379            assert_eq!(i.last_pos, BytePos(1));
380            assert_eq!(i.uncons_while(|c| c.is_alphabetic()), "foo");
381
382            // assert_eq!(i.cur_pos(), BytePos(4));
383            assert_eq!(i.last_pos, BytePos(4));
384            assert_eq!(i.cur(), Some('/'));
385
386            unsafe {
387                i.bump();
388            }
389            assert_eq!(i.last_pos, BytePos(5));
390            assert_eq!(i.cur(), Some('d'));
391
392            unsafe {
393                i.bump();
394            }
395            assert_eq!(i.last_pos, BytePos(6));
396            assert_eq!(i.cur(), None);
397        });
398    }
399
400    // #[test]
401    // fn src_input_find_01() {
402    //     with_test_sess("foo/d", |mut i| {
403    //         assert_eq!(i.cur_pos(), BytePos(1));
404    //         assert_eq!(i.last_pos, BytePos(1));
405
406    //         assert_eq!(i.find(|c| c == '/'), Some(BytePos(5)));
407    //         assert_eq!(i.last_pos, BytePos(5));
408    //         assert_eq!(i.cur(), Some('d'));
409    //     });
410    // }
411
412    //    #[test]
413    //    fn src_input_smoke_02() {
414    //        let _ = crate::with_test_sess("℘℘/℘℘", | mut i| {
415    //            assert_eq!(i.iter.as_str(), "℘℘/℘℘");
416    //            assert_eq!(i.cur_pos(), BytePos(0));
417    //            assert_eq!(i.last_pos, BytePos(0));
418    //            assert_eq!(i.start_pos, BytePos(0));
419    //            assert_eq!(i.uncons_while(|c| c.is_ident_part()), "℘℘");
420    //
421    //            assert_eq!(i.iter.as_str(), "/℘℘");
422    //            assert_eq!(i.last_pos, BytePos(6));
423    //            assert_eq!(i.start_pos, BytePos(6));
424    //            assert_eq!(i.cur(), Some('/'));
425    //            i.bump();
426    //            assert_eq!(i.last_pos, BytePos(7));
427    //            assert_eq!(i.start_pos, BytePos(6));
428    //
429    //            assert_eq!(i.iter.as_str(), "℘℘");
430    //            assert_eq!(i.uncons_while(|c| c.is_ident_part()), "℘℘");
431    //            assert_eq!(i.last_pos, BytePos(13));
432    //            assert_eq!(i.start_pos, BytePos(13));
433    //
434    //            Ok(())
435    //        });
436    //    }
437}