swc_ecma_utils/
unicode.rs

1/// Converts a Unicode code point to UTF-16 surrogate pair.
2/// Returns (high_surrogate, low_surrogate) both as u32.
3///
4/// For code point < 0x10000, which is not represented by a surrogate pair,
5/// returns `None`
6#[inline]
7pub const fn code_point_to_pair(code_point: u32) -> Option<(u32, u32)> {
8    if code_point < 0x10000 || code_point > 0x10_ffff {
9        None
10    } else {
11        let adjusted = code_point - 0x10000;
12        let high = 0xd800 + (adjusted >> 10);
13        let low = 0xdc00 + (adjusted & 0x3ff);
14        Some((high, low))
15    }
16}
17
18/// Converts UTF-16 surrogate pair to Unicode code point.
19/// `https://tc39.es/ecma262/#sec-utf16decodesurrogatepair`
20///
21/// # Panics
22///
23/// Panics if `high` is not in the range 0xD800..=0xDBFF or `low` is not in the
24/// range 0xDC00..=0xDFFF.
25#[inline]
26pub const fn pair_to_code_point(high: u32, low: u32) -> u32 {
27    (high - 0xd800) * 0x400 + low - 0xdc00 + 0x10000
28}
29
30/// Returns true if `u` is a high surrogate (in the range 0xD800..=0xDBFF).
31#[inline]
32pub fn is_high_surrogate(u: u32) -> bool {
33    (0xd800..=0xdbff).contains(&u)
34}
35
36/// Returns true if `u` is a low surrogate (in the range 0xDC00..=0xDFFF).
37#[inline]
38pub fn is_low_surrogate(u: u32) -> bool {
39    (0xdc00..=0xdfff).contains(&u)
40}
41
42#[cfg(test)]
43mod tests {
44    use super::*;
45
46    #[test]
47    fn test_code_point_to_pair() {
48        // Poop emoji (💩) - U+1F4A9
49        let Some((high, low)) = code_point_to_pair(0x1f4a9) else {
50            unreachable!()
51        };
52        assert_eq!(high, 0xd83d);
53        assert_eq!(low, 0xdca9);
54
55        // Regular BMP character
56        assert_eq!(code_point_to_pair(0x1234), None);
57    }
58
59    #[test]
60    fn test_roundtrip() {
61        let original = 0x1f4a9;
62        let Some((high, low)) = code_point_to_pair(original) else {
63            unreachable!()
64        };
65        if low != 0 {
66            let recovered = pair_to_code_point(high, low);
67            assert_eq!(original, recovered);
68        }
69    }
70}