1use std::{cell::RefCell, mem, rc::Rc};
2
3use node::*;
4use open_elements_stack::*;
5use swc_common::{Span, DUMMY_SP};
6use swc_xml_ast::*;
7
8use self::input::{Buffer, ParserInput};
9use crate::error::{Error, ErrorKind};
10
11#[macro_use]
12mod macros;
13pub mod input;
14mod node;
15mod open_elements_stack;
16
17pub type PResult<T> = Result<T, Error>;
18
19#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
20pub struct ParserConfig {}
21
22#[derive(Debug, Default)]
23pub enum Phase {
24 #[default]
25 StartPhase,
26 MainPhase,
27 EndPhase,
28}
29
30pub struct Parser<I>
31where
32 I: ParserInput,
33{
34 #[allow(dead_code)]
35 config: ParserConfig,
36 input: Buffer<I>,
37 stopped: bool,
38 document: Option<RcNode>,
39 open_elements_stack: OpenElementsStack,
40 errors: Vec<Error>,
41 phase: Phase,
42}
43
44impl<I> Parser<I>
45where
46 I: ParserInput,
47{
48 pub fn new(input: I, config: ParserConfig) -> Self {
49 Parser {
50 config,
51 input: Buffer::new(input),
52 stopped: false,
53 document: None,
54 open_elements_stack: OpenElementsStack::new(),
55 errors: Default::default(),
56 phase: Phase::default(),
57 }
58 }
59
60 pub fn dump_cur(&mut self) -> String {
61 format!("{:?}", self.input.cur())
62 }
63
64 pub fn take_errors(&mut self) -> Vec<Error> {
65 mem::take(&mut self.errors)
66 }
67
68 pub fn parse_document(&mut self) -> PResult<Document> {
69 let start = self.input.cur_span()?;
70
71 self.document = Some(self.create_document());
72
73 self.run()?;
74
75 let document = &mut self.document.take().unwrap();
76 let nodes = document.children.take();
77 let mut children = Vec::with_capacity(nodes.len());
78
79 for node in nodes {
80 children.push(self.node_to_child(node));
81 }
82
83 let last = self.input.last_pos()?;
84
85 Ok(Document {
86 span: Span::new(start.lo(), last),
87 children,
88 })
89 }
90
91 fn create_document(&self) -> RcNode {
92 Node::new(Data::Document, DUMMY_SP)
93 }
94
95 #[allow(clippy::only_used_in_recursion)]
96 fn get_deep_end_span(&mut self, children: &[Child]) -> Option<Span> {
97 match children.last() {
98 Some(Child::DocumentType(DocumentType { span, .. })) => Some(*span),
99 Some(Child::Element(Element { span, children, .. })) => {
100 if span.is_dummy() {
101 return self.get_deep_end_span(children);
102 }
103
104 Some(*span)
105 }
106 Some(Child::Comment(Comment { span, .. })) => Some(*span),
107 Some(Child::Text(Text { span, .. })) => Some(*span),
108 _ => None,
109 }
110 }
111
112 fn node_to_child(&mut self, node: RcNode) -> Child {
113 let start_span = node.start_span.take();
114
115 match node.data.clone() {
116 Data::DocumentType {
117 name,
118 public_id,
119 system_id,
120 raw,
121 } => Child::DocumentType(DocumentType {
122 span: start_span,
123 name,
124 public_id,
125 system_id,
126 raw,
127 }),
128 Data::Element {
129 tag_name,
130 attributes,
131 } => {
132 let nodes = node.children.take();
133 let mut new_children = Vec::with_capacity(nodes.len());
134
135 for node in nodes {
136 new_children.push(self.node_to_child(node));
137 }
138
139 let attributes = attributes.take();
140
141 let span = if start_span.is_dummy() {
142 start_span
143 } else {
144 let end_span = match node.end_span.take() {
145 Some(end_span) if !end_span.is_dummy() => end_span,
146 _ => match self.get_deep_end_span(&new_children) {
147 Some(end_span) => end_span,
148 _ => start_span,
149 },
150 };
151
152 Span::new(start_span.lo(), end_span.hi())
153 };
154
155 Child::Element(Element {
156 span,
157 tag_name,
158 attributes,
159 children: new_children,
160 })
161 }
162 Data::Text { data, raw } => {
163 let span = if let Some(end_span) = node.end_span.take() {
164 swc_common::Span::new(start_span.lo(), end_span.hi())
165 } else {
166 start_span
167 };
168
169 Child::Text(Text {
170 span,
171 data: data.take().into(),
172 raw: Some(raw.take().into()),
173 })
174 }
175 Data::Comment { data, raw } => Child::Comment(Comment {
176 span: start_span,
177 data,
178 raw,
179 }),
180 Data::ProcessingInstruction { target, data } => {
181 Child::ProcessingInstruction(ProcessingInstruction {
182 span: start_span,
183 target,
184 data,
185 })
186 }
187 Data::CdataSection { data, raw } => Child::CdataSection(CdataSection {
188 span: start_span,
189 data,
190 raw,
191 }),
192 _ => {
193 unreachable!();
194 }
195 }
196 }
197
198 fn run(&mut self) -> PResult<()> {
199 while !self.stopped {
200 let mut token_and_info = match self.input.cur()? {
201 Some(_) => {
202 let span = self.input.cur_span()?;
203 let token = bump!(self);
204
205 TokenAndInfo {
206 span: span!(self, span.lo()),
207 acknowledged: false,
208 token,
209 }
210 }
211 None => {
212 let start_pos = self.input.start_pos()?;
213 let last_pos = self.input.last_pos()?;
214
215 TokenAndInfo {
216 span: Span::new(start_pos, last_pos),
217 acknowledged: false,
218 token: Token::Eof,
219 }
220 }
221 };
222
223 for error in self.input.take_errors() {
225 let (span, kind) = *error.into_inner();
226
227 self.errors.push(Error::new(span, kind));
228 }
229
230 self.tree_construction_dispatcher(&mut token_and_info)?;
231 }
232
233 Ok(())
234 }
235
236 fn tree_construction_dispatcher(&mut self, token_and_info: &mut TokenAndInfo) -> PResult<()> {
237 self.process_token(token_and_info, None)
238 }
239
240 fn process_token(
241 &mut self,
242 token_and_info: &mut TokenAndInfo,
243 phase: Option<Phase>,
244 ) -> PResult<()> {
245 let phase = match &phase {
246 Some(phase) => phase,
247 _ => &self.phase,
248 };
249
250 match phase {
251 Phase::StartPhase => match &token_and_info.token {
252 Token::StartTag { .. } => {
253 let element = self.create_element_for_token(token_and_info.clone());
254
255 self.append_node(self.document.as_ref().unwrap(), element.clone());
256 self.open_elements_stack.items.push(element);
257 self.phase = Phase::MainPhase;
258 }
259 Token::EmptyTag { .. } => {
260 let element = self.create_element_for_token(token_and_info.clone());
261
262 self.append_node(self.document.as_ref().unwrap(), element);
263 self.phase = Phase::EndPhase;
264 }
265 Token::Comment { .. } => {
266 self.append_comment_to_doc(token_and_info)?;
267 }
268 Token::ProcessingInstruction { .. } => {
269 self.append_processing_instruction_to_doc(token_and_info)?;
270 }
271 Token::Cdata { .. } => {
272 self.errors.push(Error::new(
273 token_and_info.span,
274 ErrorKind::UnexpectedTokenInStartPhase,
275 ));
276
277 self.append_cdata_to_doc(token_and_info)?;
278 }
279 Token::Character { value, .. } => {
280 if !is_whitespace(*value) {
281 self.errors.push(Error::new(
282 token_and_info.span,
283 ErrorKind::UnexpectedCharacter,
284 ));
285 }
286 }
287 Token::Eof => {
288 self.errors.push(Error::new(
289 token_and_info.span,
290 ErrorKind::UnexpectedEofInStartPhase,
291 ));
292
293 self.process_token(token_and_info, Some(Phase::EndPhase))?;
294 }
295 Token::Doctype { .. } => {
296 let document_type = self.create_document_type_for_token(token_and_info);
297
298 self.append_node(self.document.as_ref().unwrap(), document_type);
299 }
300 _ => {
301 self.errors.push(Error::new(
302 token_and_info.span,
303 ErrorKind::UnexpectedTokenInStartPhase,
304 ));
305 }
306 },
307 Phase::MainPhase => match &token_and_info.token {
308 Token::Character { .. } => {
309 self.append_character_to_current_element(token_and_info)?;
310 }
311 Token::StartTag { .. } => {
312 let element = self.create_element_for_token(token_and_info.clone());
313
314 self.append_node(self.get_current_element(), element.clone());
315 self.open_elements_stack.items.push(element);
316 }
317 Token::EmptyTag { .. } => {
318 let element = self.create_element_for_token(token_and_info.clone());
319
320 self.append_node(self.get_current_element(), element);
321 }
322 Token::EndTag { tag_name, .. } => {
323 if get_tag_name!(self.get_current_element()) != tag_name {
324 self.errors.push(Error::new(
325 token_and_info.span,
326 ErrorKind::OpeningAndEndingTagMismatch,
327 ));
328 }
329
330 let is_closed = self
331 .open_elements_stack
332 .items
333 .iter()
334 .rev()
335 .any(|node| get_tag_name!(node) == tag_name);
336
337 if is_closed {
338 let popped = self
339 .open_elements_stack
340 .pop_until_tag_name_popped(&[tag_name]);
341
342 self.update_end_tag_span(popped.as_ref(), token_and_info.span);
343 }
344
345 if self.open_elements_stack.items.is_empty() {
346 self.phase = Phase::EndPhase;
347 }
348 }
349 Token::Comment { .. } => {
350 let comment = self.create_comment(token_and_info);
351
352 self.append_node(self.get_current_element(), comment);
353 }
354 Token::ProcessingInstruction { .. } => {
355 let processing_instruction = self.create_processing_instruction(token_and_info);
356
357 self.append_node(self.get_current_element(), processing_instruction);
358 }
359 Token::Cdata { .. } => {
360 let cdata = self.create_cdata_section(token_and_info);
361
362 self.append_node(self.get_current_element(), cdata);
363 }
364 Token::Eof => {
365 self.errors.push(Error::new(
366 token_and_info.span,
367 ErrorKind::UnexpectedEofInMainPhase,
368 ));
369
370 self.process_token(token_and_info, Some(Phase::EndPhase))?;
371 }
372 _ => {
373 self.errors.push(Error::new(
374 token_and_info.span,
375 ErrorKind::UnexpectedTokenInMainPhase,
376 ));
377 }
378 },
379 Phase::EndPhase => match &token_and_info.token {
380 Token::Comment { .. } => {
381 self.append_comment_to_doc(token_and_info)?;
382 }
383 Token::ProcessingInstruction { .. } => {
384 self.append_processing_instruction_to_doc(token_and_info)?;
385 }
386 Token::Cdata { .. } => {
387 self.errors.push(Error::new(
388 token_and_info.span,
389 ErrorKind::UnexpectedTokenInEndPhase,
390 ));
391
392 self.append_cdata_to_doc(token_and_info)?;
393 }
394 Token::Character { value, .. } => {
395 if !is_whitespace(*value) {
396 self.errors.push(Error::new(
397 token_and_info.span,
398 ErrorKind::UnexpectedCharacter,
399 ));
400 }
401 }
402 Token::Eof => {
403 self.stopped = true;
404 }
405 _ => {
406 self.errors.push(Error::new(
407 token_and_info.span,
408 ErrorKind::UnexpectedTokenInEndPhase,
409 ));
410 }
411 },
412 }
413
414 Ok(())
415 }
416
417 fn append_node(&self, parent: &RcNode, child: RcNode) {
418 let previous_parent = child.parent.replace(Some(Rc::downgrade(parent)));
419
420 assert!(previous_parent.is_none());
422
423 parent.children.borrow_mut().push(child);
424 }
425
426 fn get_current_element(&self) -> &RcNode {
427 self.open_elements_stack
428 .items
429 .last()
430 .expect("no current element")
431 }
432
433 fn create_document_type_for_token(&self, token_and_info: &mut TokenAndInfo) -> RcNode {
434 let (name, public_id, system_id, raw) = match &token_and_info.token {
435 Token::Doctype {
436 name,
437 public_id,
438 system_id,
439 raw,
440 } => (
441 name.clone(),
442 public_id.clone(),
443 system_id.clone(),
444 raw.clone(),
445 ),
446 _ => {
447 unreachable!()
448 }
449 };
450
451 Node::new(
452 Data::DocumentType {
453 name,
454 public_id,
455 system_id,
456 raw,
457 },
458 token_and_info.span,
459 )
460 }
461
462 fn create_element_for_token(&self, token_and_info: TokenAndInfo) -> RcNode {
463 let element = match token_and_info.token {
464 Token::StartTag {
465 tag_name,
466 attributes,
467 ..
468 }
469 | Token::EndTag {
470 tag_name,
471 attributes,
472 ..
473 }
474 | Token::EmptyTag {
475 tag_name,
476 attributes,
477 ..
478 } => {
479 let attributes = attributes
480 .into_iter()
481 .map(|attribute_token| Attribute {
482 span: attribute_token.span,
483 namespace: None,
484 prefix: None,
485 name: attribute_token.name,
486 raw_name: attribute_token.raw_name,
487 value: attribute_token.value,
488 raw_value: attribute_token.raw_value,
489 })
490 .collect();
491
492 Data::Element {
493 tag_name,
494 attributes: RefCell::new(attributes),
495 }
496 }
497 _ => {
498 unreachable!();
499 }
500 };
501
502 Node::new(element, token_and_info.span)
503 }
504
505 fn append_character_to_current_element(
506 &mut self,
507 token_and_info: &mut TokenAndInfo,
508 ) -> PResult<()> {
509 if let Some(last) = self.open_elements_stack.items.last() {
510 let children = last.children.borrow();
511
512 if let Some(last) = children.last() {
513 if let Data::Text {
514 data,
515 raw: raw_data,
516 } = &last.data
517 {
518 match &token_and_info.token {
519 Token::Character {
520 value: c,
521 raw: raw_c,
522 } => {
523 data.borrow_mut().push(*c);
524
525 if let Some(raw_c) = raw_c {
526 raw_data.borrow_mut().push_str(raw_c);
527 }
528 }
529 _ => {
530 unreachable!();
531 }
532 }
533
534 let mut span = last.end_span.borrow_mut();
535
536 *span = Some(token_and_info.span);
537
538 return Ok(());
539 }
540 }
541 }
542
543 let (data, raw) = match &token_and_info.token {
544 Token::Character {
545 value: c,
546 raw: raw_c,
547 } => {
548 let mut data = String::with_capacity(255);
549
550 data.push(*c);
551
552 let mut raw = String::with_capacity(255);
553
554 if let Some(raw_c) = raw_c {
555 raw.push_str(raw_c);
556 }
557
558 (RefCell::new(data), RefCell::new(raw))
559 }
560 _ => {
561 unreachable!()
562 }
563 };
564
565 let text = Node::new(Data::Text { data, raw }, token_and_info.span);
566
567 self.append_node(self.get_current_element(), text);
568
569 Ok(())
570 }
571
572 fn create_comment(&self, token_and_info: &mut TokenAndInfo) -> RcNode {
573 let (data, raw) = match &token_and_info.token {
574 Token::Comment { data, raw } => (data.clone(), Some(raw.clone())),
575 _ => {
576 unreachable!()
577 }
578 };
579
580 Node::new(Data::Comment { data, raw }, token_and_info.span)
581 }
582
583 fn append_comment_to_doc(&mut self, token_and_info: &mut TokenAndInfo) -> PResult<()> {
584 let comment = self.create_comment(token_and_info);
585
586 self.append_node(self.document.as_ref().unwrap(), comment);
587
588 Ok(())
589 }
590
591 fn create_processing_instruction(&self, token_and_info: &mut TokenAndInfo) -> RcNode {
592 let (target, data) = match &token_and_info.token {
593 Token::ProcessingInstruction { target, data } => (target.clone(), data.clone()),
594 _ => {
595 unreachable!()
596 }
597 };
598
599 Node::new(
600 Data::ProcessingInstruction { target, data },
601 token_and_info.span,
602 )
603 }
604
605 fn append_processing_instruction_to_doc(
606 &mut self,
607 token_and_info: &mut TokenAndInfo,
608 ) -> PResult<()> {
609 let child = self.create_processing_instruction(token_and_info);
610
611 self.append_node(self.document.as_ref().unwrap(), child);
612
613 Ok(())
614 }
615
616 fn create_cdata_section(&self, token_and_info: &mut TokenAndInfo) -> RcNode {
617 let (data, raw) = match &token_and_info.token {
618 Token::Cdata { data, raw } => (data.clone(), Some(raw.clone())),
619 _ => {
620 unreachable!()
621 }
622 };
623
624 Node::new(Data::CdataSection { data, raw }, token_and_info.span)
625 }
626
627 fn append_cdata_to_doc(&mut self, token_and_info: &mut TokenAndInfo) -> PResult<()> {
628 let child = self.create_cdata_section(token_and_info);
629
630 self.append_node(self.document.as_ref().unwrap(), child);
631
632 Ok(())
633 }
634
635 fn update_end_tag_span(&self, node: Option<&RcNode>, span: Span) {
636 if let Some(node) = node {
637 if node.start_span.borrow().is_dummy() {
638 return;
639 }
640
641 let mut end_tag_span = node.end_span.borrow_mut();
642
643 *end_tag_span = Some(span);
644 }
645 }
646}
647
648fn is_whitespace(c: char) -> bool {
649 matches!(c, '\t' | '\r' | '\n' | '\x0C' | ' ')
650}