swc_ecma_fast_parser/lexer/common.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
//! Common helpers for the lexer
//!
//! This module contains shared functionality used across different lexer
//! modules.
use super::Lexer;
use crate::error::{Error, ErrorKind, Result};
impl Lexer<'_> {
/// Read a hexadecimal escape sequence of specified length
pub(super) fn read_hex_escape(&mut self, len: usize) -> Result<u32> {
let mut result = 0u32;
for _ in 0..len {
let digit = match self.cursor.peek() {
Some(b'0'..=b'9') => self.cursor.peek().unwrap() - b'0',
Some(b'a'..=b'f') => self.cursor.peek().unwrap() - b'a' + 10,
Some(b'A'..=b'F') => self.cursor.peek().unwrap() - b'A' + 10,
_ => {
let span = self.span();
return Err(Error {
kind: ErrorKind::InvalidString {
reason: "Invalid hexadecimal escape sequence",
},
span,
});
}
};
result = (result << 4) | (digit as u32);
self.cursor.advance();
}
Ok(result)
}
/// Read a Unicode escape sequence
pub(super) fn read_unicode_escape(&mut self) -> Result<char> {
match self.cursor.peek() {
// Unicode code point escape: \u{HHHHHH}
Some(b'{') => {
self.cursor.advance();
let mut codepoint = 0u32;
let mut digit_count = 0;
loop {
match self.cursor.peek() {
Some(b'}') => {
self.cursor.advance();
break;
}
Some(b'0'..=b'9') => {
let digit = self.cursor.peek().unwrap() - b'0';
codepoint = (codepoint << 4) | (digit as u32);
self.cursor.advance();
digit_count += 1;
}
Some(b'a'..=b'f') => {
let digit = self.cursor.peek().unwrap() - b'a' + 10;
codepoint = (codepoint << 4) | (digit as u32);
self.cursor.advance();
digit_count += 1;
}
Some(b'A'..=b'F') => {
let digit = self.cursor.peek().unwrap() - b'A' + 10;
codepoint = (codepoint << 4) | (digit as u32);
self.cursor.advance();
digit_count += 1;
}
_ => {
let span = self.span();
return Err(Error {
kind: ErrorKind::InvalidString {
reason: "Invalid Unicode escape sequence",
},
span,
});
}
}
// Too many digits or value is too large
if digit_count > 6 || codepoint > 0x10ffff {
let span = self.span();
return Err(Error {
kind: ErrorKind::InvalidString {
reason: "Unicode codepoint must be less than or equal to 0x10FFFF",
},
span,
});
}
}
if digit_count == 0 {
let span = self.span();
return Err(Error {
kind: ErrorKind::InvalidString {
reason: "Empty Unicode escape sequence",
},
span,
});
}
std::char::from_u32(codepoint).ok_or_else(|| Error {
kind: ErrorKind::InvalidString {
reason: "Invalid Unicode codepoint",
},
span: self.span(),
})
}
// Regular 4-digit Unicode escape: \uHHHH
_ => {
let codepoint = self.read_hex_escape(4)?;
std::char::from_u32(codepoint).ok_or_else(|| Error {
kind: ErrorKind::InvalidString {
reason: "Invalid Unicode codepoint",
},
span: self.span(),
})
}
}
}
}