swc_ecma_fast_parser/lexer/
common.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
//! Common helpers for the lexer
//!
//! This module contains shared functionality used across different lexer
//! modules.

use super::Lexer;
use crate::error::{Error, ErrorKind, Result};

impl Lexer<'_> {
    /// Read a hexadecimal escape sequence of specified length
    pub(super) fn read_hex_escape(&mut self, len: usize) -> Result<u32> {
        let mut result = 0u32;

        for _ in 0..len {
            let digit = match self.cursor.peek() {
                Some(b'0'..=b'9') => self.cursor.peek().unwrap() - b'0',
                Some(b'a'..=b'f') => self.cursor.peek().unwrap() - b'a' + 10,
                Some(b'A'..=b'F') => self.cursor.peek().unwrap() - b'A' + 10,
                _ => {
                    let span = self.span();
                    return Err(Error {
                        kind: ErrorKind::InvalidString {
                            reason: "Invalid hexadecimal escape sequence",
                        },
                        span,
                    });
                }
            };

            result = (result << 4) | (digit as u32);
            self.cursor.advance();
        }

        Ok(result)
    }

    /// Read a Unicode escape sequence
    pub(super) fn read_unicode_escape(&mut self) -> Result<char> {
        match self.cursor.peek() {
            // Unicode code point escape: \u{HHHHHH}
            Some(b'{') => {
                self.cursor.advance();
                let mut codepoint = 0u32;
                let mut digit_count = 0;

                loop {
                    match self.cursor.peek() {
                        Some(b'}') => {
                            self.cursor.advance();
                            break;
                        }
                        Some(b'0'..=b'9') => {
                            let digit = self.cursor.peek().unwrap() - b'0';
                            codepoint = (codepoint << 4) | (digit as u32);
                            self.cursor.advance();
                            digit_count += 1;
                        }
                        Some(b'a'..=b'f') => {
                            let digit = self.cursor.peek().unwrap() - b'a' + 10;
                            codepoint = (codepoint << 4) | (digit as u32);
                            self.cursor.advance();
                            digit_count += 1;
                        }
                        Some(b'A'..=b'F') => {
                            let digit = self.cursor.peek().unwrap() - b'A' + 10;
                            codepoint = (codepoint << 4) | (digit as u32);
                            self.cursor.advance();
                            digit_count += 1;
                        }
                        _ => {
                            let span = self.span();
                            return Err(Error {
                                kind: ErrorKind::InvalidString {
                                    reason: "Invalid Unicode escape sequence",
                                },
                                span,
                            });
                        }
                    }

                    // Too many digits or value is too large
                    if digit_count > 6 || codepoint > 0x10ffff {
                        let span = self.span();
                        return Err(Error {
                            kind: ErrorKind::InvalidString {
                                reason: "Unicode codepoint must be less than or equal to 0x10FFFF",
                            },
                            span,
                        });
                    }
                }

                if digit_count == 0 {
                    let span = self.span();
                    return Err(Error {
                        kind: ErrorKind::InvalidString {
                            reason: "Empty Unicode escape sequence",
                        },
                        span,
                    });
                }

                std::char::from_u32(codepoint).ok_or_else(|| Error {
                    kind: ErrorKind::InvalidString {
                        reason: "Invalid Unicode codepoint",
                    },
                    span: self.span(),
                })
            }

            // Regular 4-digit Unicode escape: \uHHHH
            _ => {
                let codepoint = self.read_hex_escape(4)?;
                std::char::from_u32(codepoint).ok_or_else(|| Error {
                    kind: ErrorKind::InvalidString {
                        reason: "Invalid Unicode codepoint",
                    },
                    span: self.span(),
                })
            }
        }
    }
}