1use phf::{phf_set, Set};
2
3pub fn is_valid_unicode_property(name: &str, value: &str) -> bool {
5 if matches!(name, "General_Category" | "gc") {
6 return GC_PROPERTY_VALUES.contains(value);
7 }
8 if matches!(name, "Script" | "sc") {
9 return SC_PROPERTY_VALUES.contains(value);
10 }
11 if matches!(name, "Script_Extensions" | "scx") {
12 return SC_PROPERTY_VALUES.contains(value) || SCX_PROPERTY_VALUES.contains(value);
13 }
14 false
15}
16
17pub fn is_valid_lone_unicode_property(name_or_value: &str) -> bool {
18 BINARY_UNICODE_PROPERTIES.contains(name_or_value)
19}
20pub fn is_valid_lone_unicode_property_of_strings(name_or_value: &str) -> bool {
22 BINARY_UNICODE_PROPERTIES_OF_STRINGS.contains(name_or_value)
23}
24
25static GC_PROPERTY_VALUES: Set<&'static str> = phf_set! {
29 "C", "Other",
30 "Cc", "Control", "cntrl",
31 "Cf", "Format",
32 "Cn", "Unassigned",
33 "Co", "Private_Use",
34 "Cs", "Surrogate",
35 "L", "Letter",
36 "LC", "Cased_Letter",
37 "Ll", "Lowercase_Letter",
38 "Lm", "Modifier_Letter",
39 "Lo", "Other_Letter",
40 "Lt", "Titlecase_Letter",
41 "Lu", "Uppercase_Letter",
42 "M", "Mark", "Combining_Mark",
43 "Mc", "Spacing_Mark",
44 "Me", "Enclosing_Mark",
45 "Mn", "Nonspacing_Mark",
46 "N", "Number",
47 "Nd", "Decimal_Number", "digit",
48 "Nl", "Letter_Number",
49 "No", "Other_Number",
50 "P", "Punctuation", "punct",
51 "Pc", "Connector_Punctuation",
52 "Pd", "Dash_Punctuation",
53 "Pe", "Close_Punctuation",
54 "Pf", "Final_Punctuation",
55 "Pi", "Initial_Punctuation",
56 "Po", "Other_Punctuation",
57 "Ps", "Open_Punctuation",
58 "S", "Symbol",
59 "Sc", "Currency_Symbol",
60 "Sk", "Modifier_Symbol",
61 "Sm", "Math_Symbol",
62 "So", "Other_Symbol",
63 "Z", "Separator",
64 "Zl", "Line_Separator",
65 "Zp", "Paragraph_Separator",
66 "Zs", "Space_Separator"
67};
68
69static SC_PROPERTY_VALUES: Set<&'static str> = phf_set! {
70 "Adlm", "Adlam",
71 "Aghb", "Caucasian_Albanian",
72 "Ahom",
73 "Arab", "Arabic",
74 "Armi", "Imperial_Aramaic",
75 "Armn", "Armenian",
76 "Avst", "Avestan",
77 "Bali", "Balinese",
78 "Bamu", "Bamum",
79 "Bass", "Bassa_Vah",
80 "Batk", "Batak",
81 "Beng", "Bengali",
82 "Bhks", "Bhaiksuki",
83 "Bopo", "Bopomofo",
84 "Brah", "Brahmi",
85 "Brai", "Braille",
86 "Bugi", "Buginese",
87 "Buhd", "Buhid",
88 "Cakm", "Chakma",
89 "Cans", "Canadian_Aboriginal",
90 "Cari", "Carian",
91 "Cham",
92 "Cher", "Cherokee",
93 "Chrs", "Chorasmian",
94 "Copt", "Coptic", "Qaac",
95 "Cpmn", "Cypro_Minoan",
96 "Cprt", "Cypriot",
97 "Cyrl", "Cyrillic",
98 "Deva", "Devanagari",
99 "Diak", "Dives_Akuru",
100 "Dogr", "Dogra",
101 "Dsrt", "Deseret",
102 "Dupl", "Duployan",
103 "Egyp", "Egyptian_Hieroglyphs",
104 "Elba", "Elbasan",
105 "Elym", "Elymaic",
106 "Ethi", "Ethiopic",
107 "Gara", "Garay",
108 "Geor", "Georgian",
109 "Glag", "Glagolitic",
110 "Gong", "Gunjala_Gondi",
111 "Gonm", "Masaram_Gondi",
112 "Goth", "Gothic",
113 "Gran", "Grantha",
114 "Grek", "Greek",
115 "Gujr", "Gujarati",
116 "Gukh", "Gurung_Khema",
117 "Guru", "Gurmukhi",
118 "Hang", "Hangul",
119 "Hani", "Han",
120 "Hano", "Hanunoo",
121 "Hatr", "Hatran",
122 "Hebr", "Hebrew",
123 "Hira", "Hiragana",
124 "Hluw", "Anatolian_Hieroglyphs",
125 "Hmng", "Pahawh_Hmong",
126 "Hmnp", "Nyiakeng_Puachue_Hmong",
127 "Hrkt", "Katakana_Or_Hiragana",
128 "Hung", "Old_Hungarian",
129 "Ital", "Old_Italic",
130 "Java", "Javanese",
131 "Kali", "Kayah_Li",
132 "Kana", "Katakana",
133 "Kawi",
134 "Khar", "Kharoshthi",
135 "Khmr", "Khmer",
136 "Khoj", "Khojki",
137 "Kits", "Khitan_Small_Script",
138 "Knda", "Kannada",
139 "Krai", "Kirat_Rai",
140 "Kthi", "Kaithi",
141 "Lana", "Tai_Tham",
142 "Laoo", "Lao",
143 "Latn", "Latin",
144 "Lepc", "Lepcha",
145 "Limb", "Limbu",
146 "Lina", "Linear_A",
147 "Linb", "Linear_B",
148 "Lisu",
149 "Lyci", "Lycian",
150 "Lydi", "Lydian",
151 "Mahj", "Mahajani",
152 "Maka", "Makasar",
153 "Mand", "Mandaic",
154 "Mani", "Manichaean",
155 "Marc", "Marchen",
156 "Medf", "Medefaidrin",
157 "Mend", "Mende_Kikakui",
158 "Merc", "Meroitic_Cursive",
159 "Mero", "Meroitic_Hieroglyphs",
160 "Mlym", "Malayalam",
161 "Modi",
162 "Mong", "Mongolian",
163 "Mroo", "Mro",
164 "Mtei", "Meetei_Mayek",
165 "Mult", "Multani",
166 "Mymr", "Myanmar",
167 "Nagm", "Nag_Mundari",
168 "Nand", "Nandinagari",
169 "Narb", "Old_North_Arabian",
170 "Nbat", "Nabataean",
171 "Newa",
172 "Nkoo", "Nko",
173 "Nshu", "Nushu",
174 "Ogam", "Ogham",
175 "Olck", "Ol_Chiki",
176 "Onao", "Ol_Onal",
177 "Orkh", "Old_Turkic",
178 "Orya", "Oriya",
179 "Osge", "Osage",
180 "Osma", "Osmanya",
181 "Ougr", "Old_Uyghur",
182 "Palm", "Palmyrene",
183 "Pauc", "Pau_Cin_Hau",
184 "Perm", "Old_Permic",
185 "Phag", "Phags_Pa",
186 "Phli", "Inscriptional_Pahlavi",
187 "Phlp", "Psalter_Pahlavi",
188 "Phnx", "Phoenician",
189 "Plrd", "Miao",
190 "Prti", "Inscriptional_Parthian",
191 "Rjng", "Rejang",
192 "Rohg", "Hanifi_Rohingya",
193 "Runr", "Runic",
194 "Samr", "Samaritan",
195 "Sarb", "Old_South_Arabian",
196 "Saur", "Saurashtra",
197 "Sgnw", "SignWriting",
198 "Shaw", "Shavian",
199 "Shrd", "Sharada",
200 "Sidd", "Siddham",
201 "Sind", "Khudawadi",
202 "Sinh", "Sinhala",
203 "Sogd", "Sogdian",
204 "Sogo", "Old_Sogdian",
205 "Sora", "Sora_Sompeng",
206 "Soyo", "Soyombo",
207 "Sund", "Sundanese",
208 "Sunu", "Sunuwar",
209 "Sylo", "Syloti_Nagri",
210 "Syrc", "Syriac",
211 "Tagb", "Tagbanwa",
212 "Takr", "Takri",
213 "Tale", "Tai_Le",
214 "Talu", "New_Tai_Lue",
215 "Taml", "Tamil",
216 "Tang", "Tangut",
217 "Tavt", "Tai_Viet",
218 "Telu", "Telugu",
219 "Tfng", "Tifinagh",
220 "Tglg", "Tagalog",
221 "Thaa", "Thaana",
222 "Thai",
223 "Tibt", "Tibetan",
224 "Tirh", "Tirhuta",
225 "Tnsa", "Tangsa",
226 "Todr", "Todhri",
227 "Toto",
228 "Tutg", "Tulu_Tigalari",
229 "Ugar", "Ugaritic",
230 "Vaii", "Vai",
231 "Vith", "Vithkuqi",
232 "Wara", "Warang_Citi",
233 "Wcho", "Wancho",
234 "Xpeo", "Old_Persian",
235 "Xsux", "Cuneiform",
236 "Yezi", "Yezidi",
237 "Yiii", "Yi",
238 "Zanb", "Zanabazar_Square",
239 "Zinh", "Inherited", "Qaai",
240 "Zyyy", "Common",
241 "Zzzz", "Unknown",
242};
243
244static SCX_PROPERTY_VALUES: Set<&'static str> = phf_set! {
245 };
247
248static BINARY_UNICODE_PROPERTIES: Set<&'static str> = phf_set! {
251 "ASCII",
252 "ASCII_Hex_Digit",
253 "AHex",
254 "Alphabetic",
255 "Alpha",
256 "Any",
257 "Assigned",
258 "Bidi_Control",
259 "Bidi_C",
260 "Bidi_Mirrored",
261 "Bidi_M",
262 "Case_Ignorable",
263 "CI",
264 "Cased",
265 "Changes_When_Casefolded",
266 "CWCF",
267 "Changes_When_Casemapped",
268 "CWCM",
269 "Changes_When_Lowercased",
270 "CWL",
271 "Changes_When_NFKC_Casefolded",
272 "CWKCF",
273 "Changes_When_Titlecased",
274 "CWT",
275 "Changes_When_Uppercased",
276 "CWU",
277 "Dash",
278 "Default_Ignorable_Code_Point",
279 "DI",
280 "Deprecated",
281 "Dep",
282 "Diacritic",
283 "Dia",
284 "Emoji",
285 "Emoji_Component",
286 "EComp",
287 "Emoji_Modifier",
288 "EMod",
289 "Emoji_Modifier_Base",
290 "EBase",
291 "Emoji_Presentation",
292 "EPres",
293 "Extended_Pictographic",
294 "ExtPict",
295 "Extender",
296 "Ext",
297 "Grapheme_Base",
298 "Gr_Base",
299 "Grapheme_Extend",
300 "Gr_Ext",
301 "Hex_Digit",
302 "Hex",
303 "IDS_Binary_Operator",
304 "IDSB",
305 "IDS_Trinary_Operator",
306 "IDST",
307 "ID_Continue",
308 "IDC",
309 "ID_Start",
310 "IDS",
311 "Ideographic",
312 "Ideo",
313 "Join_Control",
314 "Join_C",
315 "Logical_Order_Exception",
316 "LOE",
317 "Lowercase",
318 "Lower",
319 "Math",
320 "Noncharacter_Code_Point",
321 "NChar",
322 "Pattern_Syntax",
323 "Pat_Syn",
324 "Pattern_White_Space",
325 "Pat_WS",
326 "Quotation_Mark",
327 "QMark",
328 "Radical",
329 "Regional_Indicator",
330 "RI",
331 "Sentence_Terminal",
332 "STerm",
333 "Soft_Dotted",
334 "SD",
335 "Terminal_Punctuation",
336 "Term",
337 "Unified_Ideograph",
338 "UIdeo",
339 "Uppercase",
340 "Upper",
341 "Variation_Selector",
342 "VS",
343 "White_Space",
344 "space",
345 "XID_Continue",
346 "XIDC",
347 "XID_Start",
348 "XIDS",
349};
350
351static BINARY_UNICODE_PROPERTIES_OF_STRINGS: Set<&'static str> = phf_set! {
354 "Basic_Emoji",
355 "Emoji_Keycap_Sequence",
356 "RGI_Emoji_Modifier_Sequence",
357 "RGI_Emoji_Flag_Sequence",
358 "RGI_Emoji_Tag_Sequence",
359 "RGI_Emoji_ZWJ_Sequence",
360 "RGI_Emoji",
361};
362