icu_locale_core/extensions/unicode/
keywords.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use core::borrow::Borrow;
6use core::cmp::Ordering;
7#[cfg(feature = "alloc")]
8use core::iter::FromIterator;
9#[cfg(feature = "alloc")]
10use core::str::FromStr;
11use litemap::LiteMap;
12
13use super::Key;
14use super::Value;
15#[cfg(feature = "alloc")]
16use crate::parser::ParseError;
17#[cfg(feature = "alloc")]
18use crate::parser::SubtagIterator;
19use crate::shortvec::ShortBoxSlice;
20
21/// A list of [`Key`]-[`Value`] pairs representing functional information
22/// about locale's internationalization preferences.
23///
24/// Here are examples of fields used in Unicode:
25/// - `hc` - Hour Cycle (`h11`, `h12`, `h23`, `h24`)
26/// - `ca` - Calendar (`buddhist`, `gregory`, ...)
27/// - `fw` - First Day Of the Week (`sun`, `mon`, `sat`, ...)
28///
29/// You can find the full list in [`Unicode BCP 47 U Extension`] section of LDML.
30///
31/// [`Unicode BCP 47 U Extension`]: https://unicode.org/reports/tr35/tr35.html#Key_And_Type_Definitions_
32///
33/// # Examples
34///
35/// Manually build up a [`Keywords`] object:
36///
37/// ```
38/// use icu::locale::extensions::unicode::{key, value, Keywords};
39///
40/// let keywords = [(key!("hc"), value!("h23"))]
41///     .into_iter()
42///     .collect::<Keywords>();
43///
44/// assert_eq!(&keywords.to_string(), "hc-h23");
45/// ```
46///
47/// Access a [`Keywords`] object from a [`Locale`]:
48///
49/// ```
50/// use icu::locale::{
51///     extensions::unicode::{key, value},
52///     Locale,
53/// };
54///
55/// let loc: Locale = "und-u-hc-h23-kc-true".parse().expect("Valid BCP-47");
56///
57/// assert_eq!(loc.extensions.unicode.keywords.get(&key!("ca")), None);
58/// assert_eq!(
59///     loc.extensions.unicode.keywords.get(&key!("hc")),
60///     Some(&value!("h23"))
61/// );
62/// assert_eq!(
63///     loc.extensions.unicode.keywords.get(&key!("kc")),
64///     Some(&value!("true"))
65/// );
66///
67/// assert_eq!(loc.extensions.unicode.keywords.to_string(), "hc-h23-kc");
68/// ```
69///
70/// [`Locale`]: crate::Locale
71#[derive(Clone, PartialEq, Eq, Debug, Default, Hash, PartialOrd, Ord)]
72pub struct Keywords(LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>);
73
74impl Keywords {
75    /// Returns a new empty list of key-value pairs. Same as [`default()`](Default::default()), but is `const`.
76    ///
77    /// # Examples
78    ///
79    /// ```
80    /// use icu::locale::extensions::unicode::Keywords;
81    ///
82    /// assert_eq!(Keywords::new(), Keywords::default());
83    /// ```
84    #[inline]
85    pub const fn new() -> Self {
86        Self(LiteMap::new())
87    }
88
89    /// Create a new list of key-value pairs having exactly one pair, callable in a `const` context.
90    #[inline]
91    pub const fn new_single(key: Key, value: Value) -> Self {
92        Self(LiteMap::from_sorted_store_unchecked(
93            ShortBoxSlice::new_single((key, value)),
94        ))
95    }
96
97    /// A constructor which takes a str slice, parses it and
98    /// produces a well-formed [`Keywords`].
99    #[inline]
100    #[cfg(feature = "alloc")]
101    pub fn try_from_str(s: &str) -> Result<Self, ParseError> {
102        Self::try_from_utf8(s.as_bytes())
103    }
104
105    /// See [`Self::try_from_str`]
106    #[cfg(feature = "alloc")]
107    pub fn try_from_utf8(code_units: &[u8]) -> Result<Self, ParseError> {
108        let mut iter = SubtagIterator::new(code_units);
109        Self::try_from_iter(&mut iter)
110    }
111
112    /// Returns `true` if there are no keywords.
113    ///
114    /// # Examples
115    ///
116    /// ```
117    /// use icu::locale::locale;
118    /// use icu::locale::Locale;
119    ///
120    /// let loc1 = Locale::try_from_str("und-t-h0-hybrid").unwrap();
121    /// let loc2 = locale!("und-u-ca-buddhist");
122    ///
123    /// assert!(loc1.extensions.unicode.keywords.is_empty());
124    /// assert!(!loc2.extensions.unicode.keywords.is_empty());
125    /// ```
126    pub fn is_empty(&self) -> bool {
127        self.0.is_empty()
128    }
129
130    /// Returns `true` if the list contains a [`Value`] for the specified [`Key`].
131    ///
132    ///
133    /// # Examples
134    ///
135    /// ```
136    /// use icu::locale::extensions::unicode::{key, value, Keywords};
137    ///
138    /// let keywords = [(key!("ca"), value!("gregory"))]
139    ///     .into_iter()
140    ///     .collect::<Keywords>();
141    ///
142    /// assert!(&keywords.contains_key(&key!("ca")));
143    /// ```
144    pub fn contains_key<Q>(&self, key: &Q) -> bool
145    where
146        Key: Borrow<Q>,
147        Q: Ord,
148    {
149        self.0.contains_key(key)
150    }
151
152    /// Returns a reference to the [`Value`] corresponding to the [`Key`].
153    ///
154    ///
155    /// # Examples
156    ///
157    /// ```
158    /// use icu::locale::extensions::unicode::{key, value, Keywords};
159    ///
160    /// let keywords = [(key!("ca"), value!("buddhist"))]
161    ///     .into_iter()
162    ///     .collect::<Keywords>();
163    ///
164    /// assert_eq!(keywords.get(&key!("ca")), Some(&value!("buddhist")));
165    /// ```
166    pub fn get<Q>(&self, key: &Q) -> Option<&Value>
167    where
168        Key: Borrow<Q>,
169        Q: Ord,
170    {
171        self.0.get(key)
172    }
173
174    /// Returns a mutable reference to the [`Value`] corresponding to the [`Key`].
175    ///
176    /// Returns `None` if the key doesn't exist or if the key has no value.
177    ///
178    /// # Examples
179    ///
180    /// ```
181    /// use icu::locale::extensions::unicode::{key, value, Keywords};
182    ///
183    /// let mut keywords = [(key!("ca"), value!("buddhist"))]
184    ///     .into_iter()
185    ///     .collect::<Keywords>();
186    ///
187    /// if let Some(value) = keywords.get_mut(&key!("ca")) {
188    ///     *value = value!("gregory");
189    /// }
190    /// assert_eq!(keywords.get(&key!("ca")), Some(&value!("gregory")));
191    /// ```
192    #[cfg(feature = "alloc")]
193    pub fn get_mut<Q>(&mut self, key: &Q) -> Option<&mut Value>
194    where
195        Key: Borrow<Q>,
196        Q: Ord,
197    {
198        self.0.get_mut(key)
199    }
200
201    /// Sets the specified keyword, returning the old value if it already existed.
202    ///
203    /// # Examples
204    ///
205    /// ```
206    /// use icu::locale::extensions::unicode::{key, value};
207    /// use icu::locale::Locale;
208    ///
209    /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
210    ///     .parse()
211    ///     .expect("valid BCP-47 identifier");
212    /// let old_value = loc
213    ///     .extensions
214    ///     .unicode
215    ///     .keywords
216    ///     .set(key!("ca"), value!("japanese"));
217    ///
218    /// assert_eq!(old_value, Some(value!("buddhist")));
219    /// assert_eq!(loc, "und-u-hello-ca-japanese-hc-h12".parse().unwrap());
220    /// ```
221    #[cfg(feature = "alloc")]
222    pub fn set(&mut self, key: Key, value: Value) -> Option<Value> {
223        self.0.insert(key, value)
224    }
225
226    /// Removes the specified keyword, returning the old value if it existed.
227    ///
228    /// # Examples
229    ///
230    /// ```
231    /// use icu::locale::extensions::unicode::key;
232    /// use icu::locale::Locale;
233    ///
234    /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12"
235    ///     .parse()
236    ///     .expect("valid BCP-47 identifier");
237    /// loc.extensions.unicode.keywords.remove(key!("ca"));
238    /// assert_eq!(loc, "und-u-hello-hc-h12".parse().unwrap());
239    /// ```
240    #[cfg(feature = "alloc")]
241    pub fn remove<Q: Borrow<Key>>(&mut self, key: Q) -> Option<Value> {
242        self.0.remove(key.borrow())
243    }
244
245    /// Clears all Unicode extension keywords, leaving Unicode attributes.
246    ///
247    /// Returns the old Unicode extension keywords.
248    ///
249    /// # Examples
250    ///
251    /// ```
252    /// use icu::locale::Locale;
253    ///
254    /// let mut loc: Locale = "und-u-hello-ca-buddhist-hc-h12".parse().unwrap();
255    /// loc.extensions.unicode.keywords.clear();
256    /// assert_eq!(loc, "und-u-hello".parse().unwrap());
257    /// ```
258    pub fn clear(&mut self) -> Self {
259        core::mem::take(self)
260    }
261
262    /// Retains a subset of keywords as specified by the predicate function.
263    ///
264    /// # Examples
265    ///
266    /// ```
267    /// use icu::locale::extensions::unicode::key;
268    /// use icu::locale::Locale;
269    ///
270    /// let mut loc: Locale = "und-u-ca-buddhist-hc-h12-ms-metric".parse().unwrap();
271    ///
272    /// loc.extensions
273    ///     .unicode
274    ///     .keywords
275    ///     .retain_by_key(|&k| k == key!("hc"));
276    /// assert_eq!(loc, "und-u-hc-h12".parse().unwrap());
277    ///
278    /// loc.extensions
279    ///     .unicode
280    ///     .keywords
281    ///     .retain_by_key(|&k| k == key!("ms"));
282    /// assert_eq!(loc, Locale::UNKNOWN);
283    /// ```
284    #[cfg(feature = "alloc")]
285    pub fn retain_by_key<F>(&mut self, mut predicate: F)
286    where
287        F: FnMut(&Key) -> bool,
288    {
289        self.0.retain(|k, _| predicate(k))
290    }
291
292    /// Compare this [`Keywords`] with BCP-47 bytes.
293    ///
294    /// The return value is equivalent to what would happen if you first converted this
295    /// [`Keywords`] to a BCP-47 string and then performed a byte comparison.
296    ///
297    /// This function is case-sensitive and results in a *total order*, so it is appropriate for
298    /// binary search. The only argument producing [`Ordering::Equal`] is `self.to_string()`.
299    ///
300    /// # Examples
301    ///
302    /// ```
303    /// use icu::locale::Locale;
304    /// use std::cmp::Ordering;
305    ///
306    /// let bcp47_strings: &[&str] =
307    ///     &["ca-hebrew", "ca-japanese", "ca-japanese-nu-latn", "nu-latn"];
308    ///
309    /// for ab in bcp47_strings.windows(2) {
310    ///     let a = ab[0];
311    ///     let b = ab[1];
312    ///     assert!(a.cmp(b) == Ordering::Less);
313    ///     let a_kwds = format!("und-u-{}", a)
314    ///         .parse::<Locale>()
315    ///         .unwrap()
316    ///         .extensions
317    ///         .unicode
318    ///         .keywords;
319    ///     assert!(a_kwds.strict_cmp(a.as_bytes()) == Ordering::Equal);
320    ///     assert!(a_kwds.strict_cmp(b.as_bytes()) == Ordering::Less);
321    /// }
322    /// ```
323    pub fn strict_cmp(&self, other: &[u8]) -> Ordering {
324        writeable::cmp_utf8(self, other)
325    }
326
327    #[cfg(feature = "alloc")]
328    pub(crate) fn try_from_iter(iter: &mut SubtagIterator) -> Result<Self, ParseError> {
329        let mut keywords = LiteMap::new();
330
331        let mut current_keyword = None;
332        let mut current_value = ShortBoxSlice::new();
333
334        while let Some(subtag) = iter.peek() {
335            let slen = subtag.len();
336            if slen == 2 {
337                if let Some(kw) = current_keyword.take() {
338                    keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
339                    current_value = ShortBoxSlice::new();
340                }
341                current_keyword = Some(Key::try_from_utf8(subtag)?);
342            } else if current_keyword.is_some() {
343                match Value::parse_subtag_from_utf8(subtag) {
344                    Ok(Some(t)) => current_value.push(t),
345                    Ok(None) => {}
346                    Err(_) => break,
347                }
348            } else {
349                break;
350            }
351            iter.next();
352        }
353
354        if let Some(kw) = current_keyword.take() {
355            keywords.try_insert(kw, Value::from_short_slice_unchecked(current_value));
356        }
357
358        Ok(keywords.into())
359    }
360
361    /// Produce an ordered iterator over key-value pairs
362    pub fn iter(&self) -> impl Iterator<Item = (&Key, &Value)> {
363        self.0.iter()
364    }
365
366    pub(crate) fn for_each_subtag_str<E, F>(&self, f: &mut F) -> Result<(), E>
367    where
368        F: FnMut(&str) -> Result<(), E>,
369    {
370        for (k, v) in self.0.iter() {
371            f(k.as_str())?;
372            v.for_each_subtag_str(f)?;
373        }
374        Ok(())
375    }
376
377    /// This needs to be its own method to help with type inference in helpers.rs
378    #[cfg(test)]
379    pub(crate) fn from_tuple_vec(v: Vec<(Key, Value)>) -> Self {
380        v.into_iter().collect()
381    }
382}
383
384impl From<LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>> for Keywords {
385    fn from(map: LiteMap<Key, Value, ShortBoxSlice<(Key, Value)>>) -> Self {
386        Self(map)
387    }
388}
389
390#[cfg(feature = "alloc")]
391impl FromIterator<(Key, Value)> for Keywords {
392    fn from_iter<I: IntoIterator<Item = (Key, Value)>>(iter: I) -> Self {
393        LiteMap::from_iter(iter).into()
394    }
395}
396
397#[cfg(feature = "alloc")]
398impl FromStr for Keywords {
399    type Err = ParseError;
400
401    #[inline]
402    fn from_str(s: &str) -> Result<Self, Self::Err> {
403        Self::try_from_str(s)
404    }
405}
406
407impl_writeable_for_key_value!(Keywords, "ca", "islamic-civil", "mm", "mm");
408
409#[cfg(test)]
410mod tests {
411    use super::*;
412
413    #[test]
414    fn test_keywords_fromstr() {
415        let kw: Keywords = "hc-h12".parse().expect("Failed to parse Keywords");
416        assert_eq!(kw.to_string(), "hc-h12");
417    }
418}