icu_locale/
exemplar_chars.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! This module provides APIs for getting exemplar characters for a locale.
6//!
7//! Exemplars are characters used by a language, separated into different sets.
8//! The sets are: main, auxiliary, punctuation, numbers, and index.
9//!
10//! The sets define, according to typical usage in the language,
11//! which characters occur in which contexts with which frequency.
12//! For more information, see the documentation in the
13//! [Exemplars section in Unicode Technical Standard #35](https://unicode.org/reports/tr35/tr35-general.html#Exemplars)
14//! of the LDML specification.
15//!
16//! # Examples
17//!
18//! ```
19//! use icu::locale::exemplar_chars::ExemplarCharacters;
20//! use icu::locale::locale;
21//!
22//! let locale = locale!("en-001").into();
23//! let exemplars_main = ExemplarCharacters::try_new_main(&locale)
24//!     .expect("locale should be present");
25//!
26//! assert!(exemplars_main.contains('a'));
27//! assert!(exemplars_main.contains('z'));
28//! assert!(exemplars_main.contains_str("a"));
29//! assert!(!exemplars_main.contains_str("รค"));
30//! assert!(!exemplars_main.contains_str("ng"));
31//! ```
32
33use crate::provider::*;
34use core::ops::Deref;
35use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
36use icu_provider::{marker::ErasedMarker, prelude::*};
37
38/// A wrapper around `UnicodeSet` data (characters and strings)
39#[derive(Debug)]
40pub struct ExemplarCharacters {
41    data: DataPayload<ErasedMarker<ExemplarCharactersData<'static>>>,
42}
43
44impl ExemplarCharacters {
45    /// Construct a borrowed version of this type that can be queried.
46    ///
47    /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it
48    /// up front.
49    #[inline]
50    pub fn as_borrowed(&self) -> ExemplarCharactersBorrowed<'_> {
51        ExemplarCharactersBorrowed {
52            data: self.data.get(),
53        }
54    }
55}
56
57/// A borrowed wrapper around code point set data, returned by
58/// [`ExemplarCharacters::as_borrowed()`]. More efficient to query.
59#[derive(Clone, Copy, Debug)]
60pub struct ExemplarCharactersBorrowed<'a> {
61    data: &'a ExemplarCharactersData<'a>,
62}
63
64impl<'a> Deref for ExemplarCharactersBorrowed<'a> {
65    type Target = CodePointInversionListAndStringList<'a>;
66
67    fn deref(&self) -> &Self::Target {
68        &self.data.0
69    }
70}
71
72impl ExemplarCharactersBorrowed<'static> {
73    /// Cheaply converts a [`ExemplarCharactersBorrowed<'static>`] into a [`ExemplarCharacters`].
74    ///
75    /// Note: Due to branching and indirection, using [`ExemplarCharacters`] might inhibit some
76    /// compile-time optimizations that are possible with [`ExemplarCharactersBorrowed`].
77    pub const fn static_to_owned(self) -> ExemplarCharacters {
78        ExemplarCharacters {
79            data: DataPayload::from_static_ref(self.data),
80        }
81    }
82}
83
84macro_rules! make_exemplar_chars_unicode_set_property {
85    (
86        // currently unused
87        dyn_data_marker: $d:ident;
88        data_marker: $data_marker:ty;
89        func:
90        pub fn $unstable:ident();
91        $(#[$attr:meta])*
92        pub fn $compiled:ident();
93    ) => {
94        impl ExemplarCharactersBorrowed<'static> {
95            $(#[$attr])*
96            #[cfg(feature = "compiled_data")]
97            #[inline]
98            pub fn $compiled(
99                locale: &DataLocale,
100            ) -> Result<Self, DataError> {
101                Ok(ExemplarCharactersBorrowed {
102                    data: DataProvider::<$data_marker>::load(
103                        &crate::provider::Baked,
104                        DataRequest {
105                            id: DataIdentifierBorrowed::for_locale(locale),
106                            ..Default::default()
107                        })?
108                    .payload
109                    .get_static()
110                    .ok_or_else(|| DataError::custom("Baked provider didn't return static payload"))?
111                })
112            }
113
114        }
115        impl ExemplarCharacters {
116            $(#[$attr])*
117            #[cfg(feature = "compiled_data")]
118            pub fn $compiled(
119                locale: &DataLocale,
120            ) -> Result<ExemplarCharactersBorrowed<'static>, DataError> {
121                ExemplarCharactersBorrowed::$compiled(locale)
122            }
123
124            #[doc = concat!("A version of [`Self::", stringify!($compiled), "()`] that uses custom data provided by a [`DataProvider`].")]
125            ///
126            /// [๐Ÿ“š Help choosing a constructor](icu_provider::constructors)
127            pub fn $unstable(
128                provider: &(impl DataProvider<$data_marker> + ?Sized),
129                locale: &DataLocale,
130            ) -> Result<Self, DataError> {
131                Ok(Self {
132                    data:
133                    provider.load(
134                        DataRequest {
135                            id: DataIdentifierBorrowed::for_locale(locale),
136                            ..Default::default()
137                    })?
138                    .payload
139                    .cast()
140                })
141            }
142        }
143    }
144}
145
146make_exemplar_chars_unicode_set_property!(
147    dyn_data_marker: ExemplarCharactersMain;
148    data_marker: LocaleExemplarCharactersMainV1;
149    func:
150    pub fn try_new_main_unstable();
151
152    /// Get the "main" set of exemplar characters.
153    ///
154    /// โœจ *Enabled with the `compiled_data` Cargo feature.*
155    ///
156    /// [๐Ÿ“š Help choosing a constructor](icu_provider::constructors)
157    ///
158    /// # Examples
159    ///
160    /// ```
161    /// use icu::locale::locale;
162    /// use icu::locale::exemplar_chars::ExemplarCharacters;
163    ///
164    /// let exemplars_main = ExemplarCharacters::try_new_main(&locale!("en").into())
165    ///     .expect("locale should be present");
166    ///
167    /// assert!(exemplars_main.contains('a'));
168    /// assert!(exemplars_main.contains('z'));
169    /// assert!(exemplars_main.contains_str("a"));
170    /// assert!(!exemplars_main.contains_str("รค"));
171    /// assert!(!exemplars_main.contains_str("ng"));
172    /// assert!(!exemplars_main.contains_str("A"));
173    /// ```
174    pub fn try_new_main();
175);
176
177make_exemplar_chars_unicode_set_property!(
178    dyn_data_marker: ExemplarCharactersAuxiliary;
179    data_marker: LocaleExemplarCharactersAuxiliaryV1;
180    func:
181    pub fn try_new_auxiliary_unstable();
182
183    /// Get the "auxiliary" set of exemplar characters.
184    ///
185    /// โœจ *Enabled with the `compiled_data` Cargo feature.*
186    ///
187    /// [๐Ÿ“š Help choosing a constructor](icu_provider::constructors)
188    ///
189    /// # Examples
190    ///
191    /// ```
192    /// use icu::locale::locale;
193    /// use icu::locale::exemplar_chars::ExemplarCharacters;
194    ///
195    /// let exemplars_auxiliary =
196    ///     ExemplarCharacters::try_new_auxiliary(&locale!("en").into())
197    ///     .expect("locale should be present");
198    ///
199    /// assert!(!exemplars_auxiliary.contains('a'));
200    /// assert!(!exemplars_auxiliary.contains('z'));
201    /// assert!(!exemplars_auxiliary.contains_str("a"));
202    /// assert!(exemplars_auxiliary.contains_str("รค"));
203    /// assert!(!exemplars_auxiliary.contains_str("ng"));
204    /// assert!(!exemplars_auxiliary.contains_str("A"));
205    /// ```
206    pub fn try_new_auxiliary();
207);
208
209make_exemplar_chars_unicode_set_property!(
210    dyn_data_marker: ExemplarCharactersPunctuation;
211    data_marker: LocaleExemplarCharactersPunctuationV1;
212    func:
213    pub fn try_new_punctuation_unstable();
214
215    /// Get the "punctuation" set of exemplar characters.
216    ///
217    /// โœจ *Enabled with the `compiled_data` Cargo feature.*
218    ///
219    /// [๐Ÿ“š Help choosing a constructor](icu_provider::constructors)
220    ///
221    /// # Examples
222    ///
223    /// ```
224    /// use icu::locale::locale;
225    /// use icu::locale::exemplar_chars::ExemplarCharacters;
226    ///
227    /// let exemplars_punctuation =
228    ///     ExemplarCharacters::try_new_punctuation(&locale!("en").into())
229    ///     .expect("locale should be present");
230    ///
231    /// assert!(!exemplars_punctuation.contains('0'));
232    /// assert!(!exemplars_punctuation.contains('9'));
233    /// assert!(!exemplars_punctuation.contains('%'));
234    /// assert!(exemplars_punctuation.contains(','));
235    /// assert!(exemplars_punctuation.contains('.'));
236    /// assert!(exemplars_punctuation.contains('!'));
237    /// assert!(exemplars_punctuation.contains('?'));
238    /// ```
239    pub fn try_new_punctuation();
240);
241
242make_exemplar_chars_unicode_set_property!(
243    dyn_data_marker: ExemplarCharactersNumbers;
244    data_marker: LocaleExemplarCharactersNumbersV1;
245    func:
246    pub fn try_new_numbers_unstable();
247
248    /// Get the "numbers" set of exemplar characters.
249    ///
250    /// โœจ *Enabled with the `compiled_data` Cargo feature.*
251    ///
252    /// [๐Ÿ“š Help choosing a constructor](icu_provider::constructors)
253    ///
254    /// # Examples
255    ///
256    /// ```
257    /// use icu::locale::locale;
258    /// use icu::locale::exemplar_chars::ExemplarCharacters;
259    ///
260    /// let exemplars_numbers =
261    ///     ExemplarCharacters::try_new_numbers(&locale!("en").into())
262    ///     .expect("locale should be present");
263    ///
264    /// assert!(exemplars_numbers.contains('0'));
265    /// assert!(exemplars_numbers.contains('9'));
266    /// assert!(exemplars_numbers.contains('%'));
267    /// assert!(exemplars_numbers.contains(','));
268    /// assert!(exemplars_numbers.contains('.'));
269    /// assert!(!exemplars_numbers.contains('!'));
270    /// assert!(!exemplars_numbers.contains('?'));
271    /// ```
272    pub fn try_new_numbers();
273);
274
275make_exemplar_chars_unicode_set_property!(
276    dyn_data_marker: ExemplarCharactersIndex;
277    data_marker: LocaleExemplarCharactersIndexV1;
278    func:
279    pub fn try_new_index_unstable();
280
281    /// Get the "index" set of exemplar characters.
282    ///
283    /// โœจ *Enabled with the `compiled_data` Cargo feature.*
284    ///
285    /// [๐Ÿ“š Help choosing a constructor](icu_provider::constructors)
286    ///
287    /// # Examples
288    ///
289    /// ```
290    /// use icu::locale::locale;
291    /// use icu::locale::exemplar_chars::ExemplarCharacters;
292    ///
293    /// let exemplars_index =
294    ///     ExemplarCharacters::try_new_index(&locale!("en").into())
295    ///     .expect("locale should be present");
296    ///
297    /// assert!(!exemplars_index.contains('a'));
298    /// assert!(!exemplars_index.contains('z'));
299    /// assert!(!exemplars_index.contains_str("a"));
300    /// assert!(!exemplars_index.contains_str("รค"));
301    /// assert!(!exemplars_index.contains_str("ng"));
302    /// assert!(exemplars_index.contains_str("A"));
303    /// ```
304    pub fn try_new_index();
305);