1type ByteHandler = Option<for<'aa> fn(&mut SkipWhitespace<'aa>) -> u32>;
3
4static BYTE_HANDLERS: [ByteHandler; 256] = [
6 ___, ___, ___, ___, ___, ___, ___, ___, ___, SPC, NLN, SPC, SPC, NLN, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, SPC, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, ___, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, UNI, ];
24
25const ___: ByteHandler = None;
27
28const NLN: ByteHandler = Some(|skip| {
30 skip.newline = true;
31
32 1
33});
34
35const SPC: ByteHandler = Some(|_| 1);
37
38const UNI: ByteHandler = Some(|skip| {
40 let bytes = skip.input.as_bytes();
42 let i = skip.offset as usize;
43
44 let remaining_bytes = bytes.len() - i;
46 if remaining_bytes < 1 {
47 return 0;
48 }
49
50 let first_byte = unsafe { *bytes.get_unchecked(i) };
52 let char_len = if first_byte < 128 {
53 1
54 } else if first_byte < 224 {
55 if remaining_bytes < 2 {
56 return 0;
57 }
58 2
59 } else if first_byte < 240 {
60 if remaining_bytes < 3 {
61 return 0;
62 }
63 3
64 } else {
65 if remaining_bytes < 4 {
66 return 0;
67 }
68 4
69 };
70
71 if char_len == 3 {
74 if first_byte == 0xe2
76 && unsafe { *bytes.get_unchecked(i + 1) } == 0x80
77 && unsafe { *bytes.get_unchecked(i + 2) } == 0xa8
78 {
79 skip.newline = true;
80 return 3;
81 }
82
83 if first_byte == 0xe2
85 && unsafe { *bytes.get_unchecked(i + 1) } == 0x80
86 && unsafe { *bytes.get_unchecked(i + 2) } == 0xa9
87 {
88 skip.newline = true;
89 return 3;
90 }
91 }
92
93 let s = unsafe {
95 skip.input.get_unchecked(skip.offset as usize..)
97 };
98
99 let c = unsafe {
100 s.chars().next().unwrap_unchecked()
102 };
103
104 match c {
105 '\u{feff}' => {}
107 '\u{2028}' | '\u{2029}' => {
109 skip.newline = true;
110 }
111 _ if c.is_whitespace() => {}
113 _ => return 0,
115 }
116
117 c.len_utf8() as u32
118});
119
120pub(super) struct SkipWhitespace<'a> {
122 pub input: &'a str,
123
124 pub offset: u32,
126
127 pub newline: bool,
129}
130
131impl SkipWhitespace<'_> {
132 #[inline(always)]
133 pub fn scan(&mut self) {
134 let bytes = self.input.as_bytes();
135 let len = bytes.len();
136 let mut pos = self.offset as usize;
137 debug_assert!(pos == 0);
138 debug_assert!(pos <= len);
139
140 if pos == len {
142 return;
143 }
144
145 loop {
146 let mut byte = unsafe { *bytes.get_unchecked(pos) };
148
149 if byte == b' ' {
151 pos += 1;
152 while pos < len && unsafe { *bytes.get_unchecked(pos) } == b' ' {
154 pos += 1;
155 }
156
157 if pos >= len {
159 break;
160 }
161
162 byte = unsafe { *bytes.get_unchecked(pos) };
164 }
165
166 match byte {
168 b'\n' => {
169 pos += 1;
170 self.newline = true;
171
172 if pos >= len {
173 break;
174 }
175 continue;
176 }
177 b'\r' => {
178 pos += 1;
179
180 if pos < len && unsafe { *bytes.get_unchecked(pos) } == b'\n' {
182 pos += 1;
183 self.newline = true;
184 } else {
185 self.newline = true; }
188
189 if pos >= len {
190 break;
191 }
192 continue;
193 }
194 _ => {
196 debug_assert!(byte != b' ' && byte != b'\n' && byte != b'\r');
197 self.offset = pos as u32;
199
200 let handler = unsafe { BYTE_HANDLERS.get_unchecked(byte as usize) };
202
203 match handler {
204 Some(handler) => {
205 let delta = handler(self);
206 if delta == 0 {
207 return;
210 }
211 pos = (self.offset + delta) as usize;
212
213 if pos >= len {
214 break;
215 }
216 }
217 None => {
218 return;
221 }
222 }
223 }
224 }
225 }
226
227 self.offset = pos as u32;
229 }
230}