swc_ecma_utils/str.rs
1pub const EOF: char = '\0';
2
3// 11.1 Unicode Format-Control Characters
4
5/// U+200C ZERO WIDTH NON-JOINER, abbreviated in the spec as `<ZWNJ>`.
6/// Specially permitted in identifiers.
7pub const ZWNJ: char = '\u{200c}';
8
9/// U+200D ZERO WIDTH JOINER, abbreviated as `<ZWJ>`.
10/// Specially permitted in identifiers.
11pub const ZWJ: char = '\u{200d}';
12
13/// U+FEFF ZERO WIDTH NO-BREAK SPACE, abbreviated `<ZWNBSP>`.
14/// Considered a whitespace character in JS.
15pub const ZWNBSP: char = '\u{feff}';
16
17// 11.2 White Space
18/// U+0009 CHARACTER TABULATION, abbreviated `<TAB>`.
19pub const TAB: char = '\u{9}';
20
21/// U+000B VERTICAL TAB, abbreviated `<VT>`.
22pub const VT: char = '\u{b}';
23
24/// U+000C FORM FEED, abbreviated `<FF>`.
25pub const FF: char = '\u{c}';
26
27/// U+0020 SPACE, abbreviated `<SP>`.
28pub const SP: char = '\u{20}';
29
30/// U+00A0 NON-BREAKING SPACE, abbreviated `<NBSP>`.
31pub const NBSP: char = '\u{a0}';
32
33// U+0085 NEXT LINE, abbreviated `<NEL>`.
34const NEL: char = '\u{85}';
35
36const OGHAM_SPACE_MARK: char = '\u{1680}';
37
38const EN_QUAD: char = '\u{2000}';
39
40// U+200B ZERO WIDTH SPACE, abbreviated `<ZWSP>`.
41const ZWSP: char = '\u{200b}';
42
43// Narrow NO-BREAK SPACE, abbreviated `<NNBSP>`.
44const NNBSP: char = '\u{202f}';
45
46// U+205F MEDIUM MATHEMATICAL SPACE, abbreviated `<MMSP>`.
47const MMSP: char = '\u{205f}';
48
49const IDEOGRAPHIC_SPACE: char = '\u{3000}';
50
51// https://github.com/microsoft/TypeScript/blob/9e20e032effad965567d4a1e1c30d5433b0a3332/src/compiler/scanner.ts#L562-L571
52#[rustfmt::skip]
53pub fn is_irregular_whitespace(c: char) -> bool {
54 matches!(
55 c,
56 | VT
57 | FF
58 | NBSP
59 | NEL
60 | OGHAM_SPACE_MARK
61 | EN_QUAD..=ZWSP
62 | NNBSP
63 | MMSP
64 | IDEOGRAPHIC_SPACE
65 | ZWNBSP
66 )
67}
68
69// https://github.com/microsoft/TypeScript/blob/9e20e032effad965567d4a1e1c30d5433b0a3332/src/compiler/scanner.ts#L557-L572
70pub fn is_white_space_single_line(c: char) -> bool {
71 // Note: nextLine is in the Zs space, and should be considered to be a
72 // whitespace.
73 // It is explicitly not a line-break as it isn't in the exact set specified by
74 // EcmaScript.
75 matches!(c, SP | TAB) || is_irregular_whitespace(c)
76}
77
78// 11.3 Line Terminators
79
80/// U+000A LINE FEED, abbreviated in the spec as `<LF>`.
81pub const LF: char = '\u{a}';
82
83/// U+000D CARRIAGE RETURN, abbreviated in the spec as `<CR>`.
84pub const CR: char = '\u{d}';
85
86/// U+2028 LINE SEPARATOR, abbreviated `<LS>`.
87pub const LS: char = '\u{2028}';
88
89/// U+2029 PARAGRAPH SEPARATOR, abbreviated `<PS>`.
90pub const PS: char = '\u{2029}';
91
92pub fn is_regular_line_terminator(c: char) -> bool {
93 matches!(c, LF | CR)
94}
95
96pub fn is_irregular_line_terminator(c: char) -> bool {
97 matches!(c, LS | PS)
98}
99
100// https://github.com/microsoft/TypeScript/blob/9e20e032effad965567d4a1e1c30d5433b0a3332/src/compiler/scanner.ts#L574-L590
101pub fn is_line_terminator(c: char) -> bool {
102 is_regular_line_terminator(c) || is_irregular_line_terminator(c)
103}