icu_locale/exemplar_chars.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! This module provides APIs for getting exemplar characters for a locale.
6//!
7//! Exemplars are characters used by a language, separated into different sets.
8//! The sets are: main, auxiliary, punctuation, numbers, and index.
9//!
10//! The sets define, according to typical usage in the language,
11//! which characters occur in which contexts with which frequency.
12//! For more information, see the documentation in the
13//! [Exemplars section in Unicode Technical Standard #35](https://unicode.org/reports/tr35/tr35-general.html#Exemplars)
14//! of the LDML specification.
15//!
16//! # Examples
17//!
18//! ```
19//! use icu::locale::exemplar_chars::ExemplarCharacters;
20//! use icu::locale::locale;
21//!
22//! let locale = locale!("en-001").into();
23//! let exemplars_main = ExemplarCharacters::try_new_main(&locale)
24//! .expect("locale should be present");
25//!
26//! assert!(exemplars_main.contains('a'));
27//! assert!(exemplars_main.contains('z'));
28//! assert!(exemplars_main.contains_str("a"));
29//! assert!(!exemplars_main.contains_str("รค"));
30//! assert!(!exemplars_main.contains_str("ng"));
31//! ```
32
33use crate::provider::*;
34use core::ops::Deref;
35use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
36use icu_provider::{marker::ErasedMarker, prelude::*};
37
38/// A wrapper around `UnicodeSet` data (characters and strings)
39#[derive(Debug)]
40pub struct ExemplarCharacters {
41 data: DataPayload<ErasedMarker<ExemplarCharactersData<'static>>>,
42}
43
44impl ExemplarCharacters {
45 /// Construct a borrowed version of this type that can be queried.
46 ///
47 /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it
48 /// up front.
49 #[inline]
50 pub fn as_borrowed(&self) -> ExemplarCharactersBorrowed<'_> {
51 ExemplarCharactersBorrowed {
52 data: self.data.get(),
53 }
54 }
55}
56
57/// A borrowed wrapper around code point set data, returned by
58/// [`ExemplarCharacters::as_borrowed()`]. More efficient to query.
59#[derive(Clone, Copy, Debug)]
60pub struct ExemplarCharactersBorrowed<'a> {
61 data: &'a ExemplarCharactersData<'a>,
62}
63
64impl<'a> Deref for ExemplarCharactersBorrowed<'a> {
65 type Target = CodePointInversionListAndStringList<'a>;
66
67 fn deref(&self) -> &Self::Target {
68 &self.data.0
69 }
70}
71
72impl ExemplarCharactersBorrowed<'static> {
73 /// Cheaply converts a [`ExemplarCharactersBorrowed<'static>`] into a [`ExemplarCharacters`].
74 ///
75 /// Note: Due to branching and indirection, using [`ExemplarCharacters`] might inhibit some
76 /// compile-time optimizations that are possible with [`ExemplarCharactersBorrowed`].
77 pub const fn static_to_owned(self) -> ExemplarCharacters {
78 ExemplarCharacters {
79 data: DataPayload::from_static_ref(self.data),
80 }
81 }
82}
83
84macro_rules! make_exemplar_chars_unicode_set_property {
85 (
86 // currently unused
87 dyn_data_marker: $d:ident;
88 data_marker: $data_marker:ty;
89 func:
90 pub fn $unstable:ident();
91 $(#[$attr:meta])*
92 pub fn $compiled:ident();
93 ) => {
94 impl ExemplarCharactersBorrowed<'static> {
95 $(#[$attr])*
96 #[cfg(feature = "compiled_data")]
97 #[inline]
98 pub fn $compiled(
99 locale: &DataLocale,
100 ) -> Result<Self, DataError> {
101 Ok(ExemplarCharactersBorrowed {
102 data: DataProvider::<$data_marker>::load(
103 &crate::provider::Baked,
104 DataRequest {
105 id: DataIdentifierBorrowed::for_locale(locale),
106 ..Default::default()
107 })?
108 .payload
109 .get_static()
110 .ok_or_else(|| DataError::custom("Baked provider didn't return static payload"))?
111 })
112 }
113
114 }
115 impl ExemplarCharacters {
116 $(#[$attr])*
117 #[cfg(feature = "compiled_data")]
118 pub fn $compiled(
119 locale: &DataLocale,
120 ) -> Result<ExemplarCharactersBorrowed<'static>, DataError> {
121 ExemplarCharactersBorrowed::$compiled(locale)
122 }
123
124 #[doc = concat!("A version of [`Self::", stringify!($compiled), "()`] that uses custom data provided by a [`DataProvider`].")]
125 ///
126 /// [๐ Help choosing a constructor](icu_provider::constructors)
127 pub fn $unstable(
128 provider: &(impl DataProvider<$data_marker> + ?Sized),
129 locale: &DataLocale,
130 ) -> Result<Self, DataError> {
131 Ok(Self {
132 data:
133 provider.load(
134 DataRequest {
135 id: DataIdentifierBorrowed::for_locale(locale),
136 ..Default::default()
137 })?
138 .payload
139 .cast()
140 })
141 }
142 }
143 }
144}
145
146make_exemplar_chars_unicode_set_property!(
147 dyn_data_marker: ExemplarCharactersMain;
148 data_marker: LocaleExemplarCharactersMainV1;
149 func:
150 pub fn try_new_main_unstable();
151
152 /// Get the "main" set of exemplar characters.
153 ///
154 /// โจ *Enabled with the `compiled_data` Cargo feature.*
155 ///
156 /// [๐ Help choosing a constructor](icu_provider::constructors)
157 ///
158 /// # Examples
159 ///
160 /// ```
161 /// use icu::locale::locale;
162 /// use icu::locale::exemplar_chars::ExemplarCharacters;
163 ///
164 /// let exemplars_main = ExemplarCharacters::try_new_main(&locale!("en").into())
165 /// .expect("locale should be present");
166 ///
167 /// assert!(exemplars_main.contains('a'));
168 /// assert!(exemplars_main.contains('z'));
169 /// assert!(exemplars_main.contains_str("a"));
170 /// assert!(!exemplars_main.contains_str("รค"));
171 /// assert!(!exemplars_main.contains_str("ng"));
172 /// assert!(!exemplars_main.contains_str("A"));
173 /// ```
174 pub fn try_new_main();
175);
176
177make_exemplar_chars_unicode_set_property!(
178 dyn_data_marker: ExemplarCharactersAuxiliary;
179 data_marker: LocaleExemplarCharactersAuxiliaryV1;
180 func:
181 pub fn try_new_auxiliary_unstable();
182
183 /// Get the "auxiliary" set of exemplar characters.
184 ///
185 /// โจ *Enabled with the `compiled_data` Cargo feature.*
186 ///
187 /// [๐ Help choosing a constructor](icu_provider::constructors)
188 ///
189 /// # Examples
190 ///
191 /// ```
192 /// use icu::locale::locale;
193 /// use icu::locale::exemplar_chars::ExemplarCharacters;
194 ///
195 /// let exemplars_auxiliary =
196 /// ExemplarCharacters::try_new_auxiliary(&locale!("en").into())
197 /// .expect("locale should be present");
198 ///
199 /// assert!(!exemplars_auxiliary.contains('a'));
200 /// assert!(!exemplars_auxiliary.contains('z'));
201 /// assert!(!exemplars_auxiliary.contains_str("a"));
202 /// assert!(exemplars_auxiliary.contains_str("รค"));
203 /// assert!(!exemplars_auxiliary.contains_str("ng"));
204 /// assert!(!exemplars_auxiliary.contains_str("A"));
205 /// ```
206 pub fn try_new_auxiliary();
207);
208
209make_exemplar_chars_unicode_set_property!(
210 dyn_data_marker: ExemplarCharactersPunctuation;
211 data_marker: LocaleExemplarCharactersPunctuationV1;
212 func:
213 pub fn try_new_punctuation_unstable();
214
215 /// Get the "punctuation" set of exemplar characters.
216 ///
217 /// โจ *Enabled with the `compiled_data` Cargo feature.*
218 ///
219 /// [๐ Help choosing a constructor](icu_provider::constructors)
220 ///
221 /// # Examples
222 ///
223 /// ```
224 /// use icu::locale::locale;
225 /// use icu::locale::exemplar_chars::ExemplarCharacters;
226 ///
227 /// let exemplars_punctuation =
228 /// ExemplarCharacters::try_new_punctuation(&locale!("en").into())
229 /// .expect("locale should be present");
230 ///
231 /// assert!(!exemplars_punctuation.contains('0'));
232 /// assert!(!exemplars_punctuation.contains('9'));
233 /// assert!(!exemplars_punctuation.contains('%'));
234 /// assert!(exemplars_punctuation.contains(','));
235 /// assert!(exemplars_punctuation.contains('.'));
236 /// assert!(exemplars_punctuation.contains('!'));
237 /// assert!(exemplars_punctuation.contains('?'));
238 /// ```
239 pub fn try_new_punctuation();
240);
241
242make_exemplar_chars_unicode_set_property!(
243 dyn_data_marker: ExemplarCharactersNumbers;
244 data_marker: LocaleExemplarCharactersNumbersV1;
245 func:
246 pub fn try_new_numbers_unstable();
247
248 /// Get the "numbers" set of exemplar characters.
249 ///
250 /// โจ *Enabled with the `compiled_data` Cargo feature.*
251 ///
252 /// [๐ Help choosing a constructor](icu_provider::constructors)
253 ///
254 /// # Examples
255 ///
256 /// ```
257 /// use icu::locale::locale;
258 /// use icu::locale::exemplar_chars::ExemplarCharacters;
259 ///
260 /// let exemplars_numbers =
261 /// ExemplarCharacters::try_new_numbers(&locale!("en").into())
262 /// .expect("locale should be present");
263 ///
264 /// assert!(exemplars_numbers.contains('0'));
265 /// assert!(exemplars_numbers.contains('9'));
266 /// assert!(exemplars_numbers.contains('%'));
267 /// assert!(exemplars_numbers.contains(','));
268 /// assert!(exemplars_numbers.contains('.'));
269 /// assert!(!exemplars_numbers.contains('!'));
270 /// assert!(!exemplars_numbers.contains('?'));
271 /// ```
272 pub fn try_new_numbers();
273);
274
275make_exemplar_chars_unicode_set_property!(
276 dyn_data_marker: ExemplarCharactersIndex;
277 data_marker: LocaleExemplarCharactersIndexV1;
278 func:
279 pub fn try_new_index_unstable();
280
281 /// Get the "index" set of exemplar characters.
282 ///
283 /// โจ *Enabled with the `compiled_data` Cargo feature.*
284 ///
285 /// [๐ Help choosing a constructor](icu_provider::constructors)
286 ///
287 /// # Examples
288 ///
289 /// ```
290 /// use icu::locale::locale;
291 /// use icu::locale::exemplar_chars::ExemplarCharacters;
292 ///
293 /// let exemplars_index =
294 /// ExemplarCharacters::try_new_index(&locale!("en").into())
295 /// .expect("locale should be present");
296 ///
297 /// assert!(!exemplars_index.contains('a'));
298 /// assert!(!exemplars_index.contains('z'));
299 /// assert!(!exemplars_index.contains_str("a"));
300 /// assert!(!exemplars_index.contains_str("รค"));
301 /// assert!(!exemplars_index.contains_str("ng"));
302 /// assert!(exemplars_index.contains_str("A"));
303 /// ```
304 pub fn try_new_index();
305);