icu_properties/
emoji.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::provider::*;
6use icu_collections::codepointinvliststringlist::CodePointInversionListAndStringList;
7use icu_provider::marker::ErasedMarker;
8use icu_provider::prelude::*;
9
10/// A wrapper around `UnicodeSet` data (characters and strings)
11#[derive(Debug)]
12pub struct EmojiSetData {
13    data: DataPayload<ErasedMarker<PropertyUnicodeSet<'static>>>,
14}
15
16impl EmojiSetData {
17    /// Creates a new [`EmojiSetDataBorrowed`] for a [`EmojiSet`].
18    ///
19    /// See the documentation on [`EmojiSet`] implementations for details.
20    ///
21    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
22    ///
23    /// [📚 Help choosing a constructor](icu_provider::constructors)
24    #[cfg(feature = "compiled_data")]
25    #[allow(clippy::new_ret_no_self)]
26    pub const fn new<P: EmojiSet>() -> EmojiSetDataBorrowed<'static> {
27        EmojiSetDataBorrowed::new::<P>()
28    }
29
30    /// A version of `new()` that uses custom data provided by a [`DataProvider`].
31    ///
32    /// Note that this will return an owned version of the data. Functionality is available on
33    /// the borrowed version, accessible through [`EmojiSetData::as_borrowed`].
34    pub fn try_new_unstable<P: EmojiSet>(
35        provider: &(impl DataProvider<P::DataMarker> + ?Sized),
36    ) -> Result<EmojiSetData, DataError> {
37        Ok(EmojiSetData::from_data(
38            provider.load(Default::default())?.payload,
39        ))
40    }
41
42    /// Construct a borrowed version of this type that can be queried.
43    ///
44    /// This avoids a potential small underlying cost per API call (ex: `contains()`) by consolidating it
45    /// up front.
46    #[inline]
47    pub fn as_borrowed(&self) -> EmojiSetDataBorrowed<'_> {
48        EmojiSetDataBorrowed {
49            set: self.data.get(),
50        }
51    }
52
53    /// Construct a new one from loaded data
54    ///
55    /// Typically it is preferable to use getters instead
56    pub(crate) fn from_data<M>(data: DataPayload<M>) -> Self
57    where
58        M: DynamicDataMarker<DataStruct = PropertyUnicodeSet<'static>>,
59    {
60        Self { data: data.cast() }
61    }
62
63    /// Construct a new owned [`CodePointInversionListAndStringList`]
64    pub fn from_code_point_inversion_list_string_list(
65        set: CodePointInversionListAndStringList<'static>,
66    ) -> Self {
67        let set = PropertyUnicodeSet::from_code_point_inversion_list_string_list(set);
68        EmojiSetData::from_data(
69            DataPayload::<ErasedMarker<PropertyUnicodeSet<'static>>>::from_owned(set),
70        )
71    }
72
73    /// Convert this type to a [`CodePointInversionListAndStringList`] as a borrowed value.
74    ///
75    /// The data backing this is extensible and supports multiple implementations.
76    /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be
77    /// added, and users may select which at data generation time.
78    ///
79    /// This method returns an `Option` in order to return `None` when the backing data provider
80    /// cannot return a [`CodePointInversionListAndStringList`], or cannot do so within the expected constant time
81    /// constraint.
82    pub fn as_code_point_inversion_list_string_list(
83        &self,
84    ) -> Option<&CodePointInversionListAndStringList<'_>> {
85        self.data.get().as_code_point_inversion_list_string_list()
86    }
87
88    /// Convert this type to a [`CodePointInversionListAndStringList`], borrowing if possible,
89    /// otherwise allocating a new [`CodePointInversionListAndStringList`].
90    ///
91    /// The data backing this is extensible and supports multiple implementations.
92    /// Currently it is always [`CodePointInversionListAndStringList`]; however in the future more backends may be
93    /// added, and users may select which at data generation time.
94    ///
95    /// The performance of the conversion to this specific return type will vary
96    /// depending on the data structure that is backing `self`.
97    pub fn to_code_point_inversion_list_string_list(
98        &self,
99    ) -> CodePointInversionListAndStringList<'_> {
100        self.data.get().to_code_point_inversion_list_string_list()
101    }
102}
103
104/// A borrowed wrapper around code point set data, returned by
105/// [`EmojiSetData::as_borrowed()`]. More efficient to query.
106#[derive(Clone, Copy, Debug)]
107pub struct EmojiSetDataBorrowed<'a> {
108    set: &'a PropertyUnicodeSet<'a>,
109}
110
111impl EmojiSetDataBorrowed<'_> {
112    /// Check if the set contains the string. Strings consisting of one character
113    /// are treated as a character/code point.
114    ///
115    /// This matches ICU behavior for ICU's `UnicodeSet`.
116    #[inline]
117    pub fn contains_str(self, s: &str) -> bool {
118        self.set.contains_str(s)
119    }
120
121    /// Check if the set contains the code point.
122    #[inline]
123    pub fn contains(self, ch: char) -> bool {
124        self.set.contains(ch)
125    }
126
127    /// See [`Self::contains`].
128    #[inline]
129    pub fn contains32(self, cp: u32) -> bool {
130        self.set.contains32(cp)
131    }
132}
133
134impl EmojiSetDataBorrowed<'static> {
135    /// Creates a new [`EmojiSetDataBorrowed`] for a [`EmojiSet`].
136    ///
137    /// See the documentation on [`EmojiSet`] implementations for details.
138    ///
139    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
140    ///
141    /// [📚 Help choosing a constructor](icu_provider::constructors)
142    #[inline]
143    #[cfg(feature = "compiled_data")]
144    pub const fn new<P: EmojiSet>() -> Self {
145        EmojiSetDataBorrowed { set: P::SINGLETON }
146    }
147
148    /// Cheaply converts a [`EmojiSetDataBorrowed<'static>`] into a [`EmojiSetData`].
149    ///
150    /// Note: Due to branching and indirection, using [`EmojiSetData`] might inhibit some
151    /// compile-time optimizations that are possible with [`EmojiSetDataBorrowed`].
152    pub const fn static_to_owned(self) -> EmojiSetData {
153        EmojiSetData {
154            data: DataPayload::from_static_ref(self.set),
155        }
156    }
157}
158
159/// An Emoji set as defined by [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/#Emoji_Sets>).
160///
161/// <div class="stab unstable">
162/// 🚫 This trait is sealed; it cannot be implemented by user code. If an API requests an item that implements this
163/// trait, please consider using a type from the implementors listed below.
164/// </div>
165pub trait EmojiSet: crate::private::Sealed {
166    #[doc(hidden)]
167    type DataMarker: DataMarker<DataStruct = PropertyUnicodeSet<'static>>;
168    #[doc(hidden)]
169    #[cfg(feature = "compiled_data")]
170    const SINGLETON: &'static PropertyUnicodeSet<'static>;
171}