icu_plurals/
provider.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5// Provider structs must be stable
6#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
7
8//! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component.
9//!
10//! <div class="stab unstable">
11//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
12//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
13//! to be stable, their Rust representation might not be. Use with caution.
14//! </div>
15//!
16//! Read more about data providers: [`icu_provider`]
17
18use crate::provider::rules::runtime::ast::Rule;
19use crate::{PluralCategory, PluralElements, PluralElementsInner, PluralOperands, PluralRules};
20use alloc::borrow::{Cow, ToOwned};
21use alloc::boxed::Box;
22use alloc::vec::Vec;
23use core::fmt;
24use core::marker::PhantomData;
25use icu_provider::prelude::*;
26use yoke::Yokeable;
27use zerofrom::ZeroFrom;
28use zerovec::ule::vartuple::VarTuple;
29use zerovec::ule::vartuple::VarTupleULE;
30use zerovec::ule::AsULE;
31use zerovec::ule::EncodeAsVarULE;
32use zerovec::ule::UleError;
33use zerovec::ule::VarULE;
34use zerovec::ule::ULE;
35use zerovec::VarZeroSlice;
36
37pub mod rules;
38
39#[cfg(feature = "compiled_data")]
40#[derive(Debug)]
41/// Baked data
42///
43/// <div class="stab unstable">
44/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
45/// including in SemVer minor releases. In particular, the `DataProvider` implementations are only
46/// guaranteed to match with this version's `*_unstable` providers. Use with caution.
47/// </div>
48pub struct Baked;
49
50#[cfg(feature = "compiled_data")]
51#[allow(unused_imports)]
52const _: () = {
53    use icu_plurals_data::*;
54    mod icu {
55        pub use crate as plurals;
56        pub use icu_locale as locale;
57    }
58
59    make_provider!(Baked);
60    impl_plurals_cardinal_v1!(Baked);
61    impl_plurals_ordinal_v1!(Baked);
62    #[cfg(feature = "experimental")]
63    impl_plurals_ranges_v1!(Baked);
64};
65
66icu_provider::data_marker!(
67    /// Data for cardinal classification
68    PluralsCardinalV1,
69    "plurals/cardinal/v1",
70    PluralRulesData<'static>,
71);
72
73icu_provider::data_marker!(
74    /// Data for ordinal classification
75    PluralsOrdinalV1,
76    "plurals/ordinal/v1",
77    PluralRulesData<'static>,
78);
79
80#[cfg(feature = "experimental")]
81icu_provider::data_marker!(
82    /// Data for plural range formatting
83    PluralsRangesV1,
84    "plurals/ranges/v1",
85    PluralRanges<'static>
86);
87
88#[cfg(feature = "datagen")]
89/// The latest minimum set of markers required by this component.
90pub const MARKERS: &[DataMarkerInfo] = &[
91    PluralsCardinalV1::INFO,
92    PluralsOrdinalV1::INFO,
93    #[cfg(feature = "experimental")]
94    PluralsRangesV1::INFO,
95];
96
97/// Plural rule strings conforming to UTS 35 syntax. Includes separate fields for five of the six
98/// standard plural forms. If none of the rules match, the "other" category is assumed.
99///
100/// More information: <https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules>
101///
102/// <div class="stab unstable">
103/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
104/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
105/// to be stable, their Rust representation might not be. Use with caution.
106/// </div>
107#[derive(Default, Clone, PartialEq, Debug, Yokeable, ZeroFrom)]
108#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
109#[cfg_attr(feature = "datagen", databake(path = icu_plurals::provider))]
110#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
111pub struct PluralRulesData<'data> {
112    /// Rule that matches [`PluralCategory::Zero`], or `None` if not present.
113    #[cfg_attr(feature = "serde", serde(borrow))]
114    pub zero: Option<Rule<'data>>,
115    /// Rule that matches [`PluralCategory::One`], or `None` if not present.
116    #[cfg_attr(feature = "serde", serde(borrow))]
117    pub one: Option<Rule<'data>>,
118    /// Rule that matches [`PluralCategory::Two`], or `None` if not present.
119    #[cfg_attr(feature = "serde", serde(borrow))]
120    pub two: Option<Rule<'data>>,
121    /// Rule that matches [`PluralCategory::Few`], or `None` if not present.
122    #[cfg_attr(feature = "serde", serde(borrow))]
123    pub few: Option<Rule<'data>>,
124    /// Rule that matches [`PluralCategory::Many`], or `None` if not present.
125    #[cfg_attr(feature = "serde", serde(borrow))]
126    pub many: Option<Rule<'data>>,
127}
128
129icu_provider::data_struct!(
130    PluralRulesData<'_>,
131    #[cfg(feature = "datagen")]
132);
133
134#[cfg(feature = "experimental")]
135pub use ranges::*;
136
137#[cfg(feature = "experimental")]
138mod ranges {
139    use super::*;
140    use zerovec::ZeroMap;
141
142    /// [`PluralCategory`] but serializable as provider data.
143    ///
144    /// <div class="stab unstable">
145    /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
146    /// including in SemVer minor releases. While the serde representation of data structs is guaranteed
147    /// to be stable, their Rust representation might not be. Use with caution.
148    /// </div>
149    #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, Ord, PartialOrd)]
150    #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
151    #[cfg_attr(feature = "datagen", databake(path = icu_plurals::provider))]
152    #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
153    #[zerovec::make_ule(RawPluralCategoryULE)]
154    #[repr(u8)]
155    #[cfg_attr(
156        any(feature = "datagen", feature = "serde"),
157        serde(rename_all = "lowercase")
158    )]
159    pub enum RawPluralCategory {
160        /// CLDR "other" plural category.
161        Other = 0,
162        /// CLDR "zero" plural category.
163        Zero = 1,
164        /// CLDR "one" plural category.
165        One = 2,
166        /// CLDR "two" plural category.
167        Two = 3,
168        /// CLDR "few" plural category.
169        Few = 4,
170        /// CLDR "many" plural category.
171        Many = 5,
172    }
173
174    impl RawPluralCategory {
175        /// Gets the corresponding variant string of this `RawPluralCategory`.
176        #[cfg(any(feature = "datagen", feature = "serde"))]
177        const fn as_str(self) -> &'static str {
178            match self {
179                Self::Other => "other",
180                Self::Zero => "zero",
181                Self::One => "one",
182                Self::Two => "two",
183                Self::Few => "few",
184                Self::Many => "many",
185            }
186        }
187    }
188
189    impl From<RawPluralCategory> for PluralCategory {
190        fn from(value: RawPluralCategory) -> Self {
191            match value {
192                RawPluralCategory::Other => PluralCategory::Other,
193                RawPluralCategory::Zero => PluralCategory::Zero,
194                RawPluralCategory::One => PluralCategory::One,
195                RawPluralCategory::Two => PluralCategory::Two,
196                RawPluralCategory::Few => PluralCategory::Few,
197                RawPluralCategory::Many => PluralCategory::Many,
198            }
199        }
200    }
201
202    impl From<PluralCategory> for RawPluralCategory {
203        fn from(value: PluralCategory) -> Self {
204            match value {
205                PluralCategory::Zero => RawPluralCategory::Zero,
206                PluralCategory::One => RawPluralCategory::One,
207                PluralCategory::Two => RawPluralCategory::Two,
208                PluralCategory::Few => RawPluralCategory::Few,
209                PluralCategory::Many => RawPluralCategory::Many,
210                PluralCategory::Other => RawPluralCategory::Other,
211            }
212        }
213    }
214
215    /// An `u8` that is expected to be a plural range, but does not enforce this invariant.
216    ///
217    /// <div class="stab unstable">
218    /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
219    /// including in SemVer minor releases. While the serde representation of data structs is guaranteed
220    /// to be stable, their Rust representation might not be. Use with caution.
221    /// </div>
222    #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, Ord, PartialOrd)]
223    #[cfg_attr(feature = "datagen", derive(databake::Bake))]
224    #[cfg_attr(feature = "datagen", databake(path = icu_plurals::provider))]
225    #[zerovec::make_ule(UnvalidatedPluralRangeULE)]
226    pub struct UnvalidatedPluralRange(pub u8);
227
228    impl UnvalidatedPluralRange {
229        /// Creates a new `UnvalidatedPluralRange` from a category range.
230        pub fn from_range(start: RawPluralCategory, end: RawPluralCategory) -> Self {
231            let start = start as u8;
232            let end = end as u8;
233
234            debug_assert!(start < 16);
235            debug_assert!(end < 16);
236
237            let range = (start << 4) | end;
238
239            Self(range)
240        }
241    }
242
243    #[cfg(feature = "datagen")]
244    impl serde::Serialize for UnvalidatedPluralRange {
245        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
246        where
247            S: serde::Serializer,
248        {
249            use serde::ser::Error;
250
251            struct PrettyPrinter(RawPluralCategory, RawPluralCategory);
252
253            impl core::fmt::Display for PrettyPrinter {
254                fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
255                    f.write_str(self.0.as_str())?;
256                    f.write_str("--")?;
257                    f.write_str(self.1.as_str())
258                }
259            }
260
261            if serializer.is_human_readable() {
262                let start = RawPluralCategory::new_from_u8(self.0 >> 4)
263                    .ok_or_else(|| S::Error::custom("invalid tag in UnvalidatedPluralRange"))?;
264                let end = RawPluralCategory::new_from_u8(self.0 & 0x0F)
265                    .ok_or_else(|| S::Error::custom("invalid tag in UnvalidatedPluralRange"))?;
266                serializer.collect_str(&PrettyPrinter(start, end))
267            } else {
268                self.0.serialize(serializer)
269            }
270        }
271    }
272
273    #[cfg(feature = "serde")]
274    impl<'de> serde::Deserialize<'de> for UnvalidatedPluralRange {
275        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
276        where
277            D: serde::Deserializer<'de>,
278        {
279            use serde::de::{Error, Visitor};
280
281            struct HumanReadableVisitor;
282
283            impl Visitor<'_> for HumanReadableVisitor {
284                type Value = UnvalidatedPluralRange;
285
286                fn expecting(&self, formatter: &mut alloc::fmt::Formatter) -> alloc::fmt::Result {
287                    write!(
288                        formatter,
289                        "a plural range of the form <PluralCategory>-<PluralCategory>",
290                    )
291                }
292
293                fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
294                where
295                    E: Error,
296                {
297                    const VARIANTS: [&str; 6] = [
298                        RawPluralCategory::Other.as_str(),
299                        RawPluralCategory::Zero.as_str(),
300                        RawPluralCategory::One.as_str(),
301                        RawPluralCategory::Two.as_str(),
302                        RawPluralCategory::Few.as_str(),
303                        RawPluralCategory::Many.as_str(),
304                    ];
305
306                    let (start, end) = v
307                        .split_once("--")
308                        .ok_or_else(|| E::custom("expected token `--` in plural range"))?;
309
310                    let start = PluralCategory::get_for_cldr_string(start)
311                        .ok_or_else(|| E::unknown_variant(start, &VARIANTS))?;
312                    let end = PluralCategory::get_for_cldr_string(end)
313                        .ok_or_else(|| E::unknown_variant(end, &VARIANTS))?;
314
315                    Ok(UnvalidatedPluralRange::from_range(start.into(), end.into()))
316                }
317            }
318
319            if deserializer.is_human_readable() {
320                deserializer.deserialize_str(HumanReadableVisitor)
321            } else {
322                Ok(Self(<u8>::deserialize(deserializer)?))
323            }
324        }
325    }
326
327    /// Plural categories for ranges.
328    ///
329    /// Obtains the plural category of a range from the categories of its endpoints. It is required that
330    /// the start value must be strictly less than the end value, and both values must be strictly positive.
331    ///
332    /// More information: <https://unicode.org/reports/tr35/tr35-numbers.html#Plural_Ranges>
333    ///
334    /// <div class="stab unstable">
335    /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
336    /// including in SemVer minor releases. While the serde representation of data structs is guaranteed
337    /// to be stable, their Rust representation might not be. Use with caution.
338    /// </div>
339    #[derive(Clone, PartialEq, Debug, Yokeable, ZeroFrom)]
340    #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
341    #[cfg_attr(feature = "datagen", databake(path = icu_plurals::provider))]
342    #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
343    #[yoke(prove_covariance_manually)]
344    pub struct PluralRanges<'data> {
345        /// Map between the categories of the endpoints of a range and its corresponding
346        /// category.
347        ///
348        /// This is roughly equivalent to a `BTreeMap<(PluralCategory, PluralCategory), PluralCategory>`,
349        /// where the key is `(start category, end category)`.
350        #[cfg_attr(feature = "serde", serde(borrow))]
351        pub ranges: ZeroMap<'data, UnvalidatedPluralRange, RawPluralCategory>,
352    }
353
354    icu_provider::data_struct!(
355        PluralRanges<'_>,
356        #[cfg(feature = "datagen")]
357    );
358}
359
360/// A sized packed [`PluralElements`] suitable for use in data structs.
361///
362/// This type has the following limitations:
363///
364/// 1. It only supports `str`
365/// 2. It does not implement [`VarULE`] so it can't be used in a [`VarZeroSlice`]
366/// 3. It always serializes the [`FourBitMetadata`] as 0
367///
368/// Use [`PluralElementsPackedULE`] directly if you need these additional features.
369#[derive(Debug, PartialEq, Yokeable, ZeroFrom)]
370#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
371#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
372#[cfg_attr(feature = "datagen", databake(path = icu_plurals::provider))]
373#[cfg_attr(
374    feature = "serde",
375    serde(
376        transparent,
377        bound(
378            serialize = "V: serde::Serialize + PartialEq",
379            deserialize = "Box<PluralElementsPackedULE<V>>: serde::Deserialize<'de>"
380        )
381    )
382)]
383pub struct PluralElementsPackedCow<'data, V: VarULE + ?Sized> {
384    /// The encoded elements.
385    #[cfg_attr(
386        feature = "serde",
387        serde(
388            borrow,
389            deserialize_with = "deserialize_plural_elements_packed_cow::<_, V>"
390        )
391    )]
392    pub elements: Cow<'data, PluralElementsPackedULE<V>>,
393}
394
395/// A bitpacked DST for [`PluralElements`].
396///
397/// Can be put in a [`Cow`] or a [`VarZeroSlice`].
398#[derive(Debug, PartialEq, Eq)]
399#[repr(transparent)]
400pub struct PluralElementsPackedULE<V: VarULE + ?Sized> {
401    _v: PhantomData<V>,
402    /// Invariant Representation:
403    ///
404    /// First byte: `d...mmmm`
405    /// - `d` = 0 if singleton, 1 if a map
406    /// - `...` = padding, should be 0
407    /// - `mmmm` = [`FourBitMetadata`] for the default value
408    ///
409    /// If d is 0:
410    /// - Remainder: the default (plural "other") value `V`
411    ///
412    /// If d is 1:
413    /// - Second byte: L = the length of `V`
414    /// - Bytes 2..(2+L): the default (plural "other") value `V`
415    /// - Remainder: [`PluralElementsTupleSliceVarULE`]
416    bytes: [u8],
417}
418
419impl<V: VarULE + ?Sized> ToOwned for PluralElementsPackedULE<V> {
420    type Owned = Box<PluralElementsPackedULE<V>>;
421    fn to_owned(&self) -> Self::Owned {
422        self.to_boxed()
423    }
424}
425
426// Safety (based on the safety checklist on the VarULE trait):
427//  1. PluralElementsPackedULE does not include any uninitialized or padding bytes: it is transparent over a VarULE type ([u8])
428//  2. PluralElementsPackedULE is aligned to 1 byte: it is transparent over a VarULE type ([u8])
429//  3. The impl of `validate_bytes()` returns an error if any byte is not valid.
430//  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety
431//  5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
432//  6. `parse_bytes()` is equivalent to `validate_bytes()` followed by `from_bytes_unchecked()`
433//  7. byte equality is semantic equality
434unsafe impl<V> VarULE for PluralElementsPackedULE<V>
435where
436    V: VarULE + ?Sized,
437{
438    fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
439        let unpacked_bytes =
440            Self::unpack_bytes(bytes).ok_or_else(|| UleError::length::<Self>(bytes.len()))?;
441        // The high bit of lead_byte was read in unpack_bytes.
442        // Bits 0-3 are FourBitMetadata.
443        // We expect bits 4-6 to be padding.
444        if unpacked_bytes.lead_byte & 0x70 != 0 {
445            return Err(UleError::parse::<Self>());
446        }
447        // Now validate the two variable-length slices.
448        V::validate_bytes(unpacked_bytes.v_bytes)?;
449        if let Some(specials_bytes) = unpacked_bytes.specials_bytes {
450            PluralElementsTupleSliceVarULE::<V>::validate_bytes(specials_bytes)?;
451        }
452        Ok(())
453    }
454
455    unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
456        // Safety: the bytes are valid by trait invariant, and we are transparent over bytes
457        core::mem::transmute(bytes)
458    }
459}
460
461impl<V> PluralElementsPackedULE<V>
462where
463    V: VarULE + ?Sized,
464{
465    /// Casts a byte slice to a [`PluralElementsPackedULE`].
466    ///
467    /// # Safety
468    ///
469    /// The bytes must be valid according to [`PluralElementsPackedULE::validate_bytes`].
470    pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
471        // Safety: the bytes are valid by trait invariant, and we are transparent over bytes
472        core::mem::transmute(bytes)
473    }
474
475    /// Returns a tuple with:
476    /// 1. The lead byte
477    /// 2. Bytes corresponding to the default V
478    /// 3. Bytes corresponding to the specials slice, if present
479    #[inline]
480    fn unpack_bytes(bytes: &[u8]) -> Option<PluralElementsUnpackedBytes> {
481        let (lead_byte, remainder) = bytes.split_first()?;
482        if lead_byte & 0x80 == 0 {
483            Some(PluralElementsUnpackedBytes {
484                lead_byte: *lead_byte,
485                v_bytes: remainder,
486                specials_bytes: None,
487            })
488        } else {
489            let (second_byte, remainder) = remainder.split_first()?;
490            let (v_bytes, remainder) = remainder.split_at_checked(*second_byte as usize)?;
491            Some(PluralElementsUnpackedBytes {
492                lead_byte: *lead_byte,
493                v_bytes,
494                specials_bytes: Some(remainder),
495            })
496        }
497    }
498
499    /// Unpacks this structure into the default value and the optional list of specials.
500    fn as_parts(&self) -> PluralElementsUnpacked<V> {
501        // Safety: the bytes are valid by invariant
502        let unpacked_bytes = unsafe { Self::unpack_bytes(&self.bytes).unwrap_unchecked() };
503        let metadata = FourBitMetadata(unpacked_bytes.lead_byte & 0x0F);
504        // Safety: the bytes are valid by invariant
505        let default = unsafe { V::from_bytes_unchecked(unpacked_bytes.v_bytes) };
506        #[allow(clippy::manual_map)] // more explicit with the unsafe code
507        let specials = if let Some(specials_bytes) = unpacked_bytes.specials_bytes {
508            // Safety: the bytes are valid by invariant
509            Some(unsafe {
510                PluralElementsTupleSliceVarULE::<V>::from_bytes_unchecked(specials_bytes)
511            })
512        } else {
513            None
514        };
515        PluralElementsUnpacked {
516            default: (metadata, default),
517            specials,
518        }
519    }
520
521    /// Returns the value for the given [`PluralOperands`] and [`PluralRules`].
522    pub fn get<'a>(&'a self, op: PluralOperands, rules: &PluralRules) -> (FourBitMetadata, &'a V) {
523        let parts = self.as_parts();
524
525        let category = rules.category_for(op);
526
527        match parts.specials {
528            Some(specials) => {
529                if op.is_exactly_zero() {
530                    if let Some(value) = get_special(specials, PluralElementsKeys::ExplicitZero) {
531                        return value;
532                    }
533                }
534                if op.is_exactly_one() {
535                    if let Some(value) = get_special(specials, PluralElementsKeys::ExplicitOne) {
536                        return value;
537                    }
538                }
539                match category {
540                    PluralCategory::Zero => Some(PluralElementsKeys::Zero),
541                    PluralCategory::One => Some(PluralElementsKeys::One),
542                    PluralCategory::Two => Some(PluralElementsKeys::Two),
543                    PluralCategory::Few => Some(PluralElementsKeys::Few),
544                    PluralCategory::Many => Some(PluralElementsKeys::Many),
545                    PluralCategory::Other => None,
546                }
547                .and_then(|key| get_special(specials, key))
548            }
549            None => None,
550        }
551        .unwrap_or(parts.default)
552    }
553
554    /// Recovers the [`PluralElements`] corresponding to this packed structure.
555    #[cfg(feature = "datagen")]
556    pub fn decode(&self) -> PluralElements<(FourBitMetadata, &V)> {
557        PluralElements(PluralElementsInner::from_packed(self))
558    }
559
560    /// Returns the value for the default ("other") plural variant.
561    pub fn get_default(&self) -> (FourBitMetadata, &V) {
562        self.as_parts().default
563    }
564}
565
566#[derive(Debug, Clone, Copy, PartialEq, Eq)]
567#[zerovec::make_ule(PluralCategoryULE)]
568#[repr(u8)]
569#[cfg_attr(feature = "datagen", derive(serde::Serialize))]
570#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
571enum PluralElementsKeys {
572    Zero = 0,
573    One = 1,
574    Two = 2,
575    Few = 3,
576    Many = 4,
577    ExplicitZero = 5,
578    ExplicitOne = 6,
579}
580
581impl<T> PluralElementsInner<T>
582where
583    T: PartialEq,
584{
585    fn get_specials_tuples(&self) -> impl Iterator<Item = (PluralElementsKeys, &T)> {
586        [
587            self.zero
588                .as_ref()
589                .filter(|&p| *p != self.other)
590                .map(|s| (PluralElementsKeys::Zero, s)),
591            self.one
592                .as_ref()
593                .filter(|&p| *p != self.other)
594                .map(|s| (PluralElementsKeys::One, s)),
595            self.two
596                .as_ref()
597                .filter(|&p| *p != self.other)
598                .map(|s| (PluralElementsKeys::Two, s)),
599            self.few
600                .as_ref()
601                .filter(|&p| *p != self.other)
602                .map(|s| (PluralElementsKeys::Few, s)),
603            self.many
604                .as_ref()
605                .filter(|&p| *p != self.other)
606                .map(|s| (PluralElementsKeys::Many, s)),
607            self.explicit_zero
608                .as_ref()
609                .filter(|&p| *p != self.other)
610                .map(|s| (PluralElementsKeys::ExplicitZero, s)),
611            self.explicit_one
612                .as_ref()
613                .filter(|&p| *p != self.other)
614                .map(|s| (PluralElementsKeys::ExplicitOne, s)),
615        ]
616        .into_iter()
617        .flatten()
618    }
619}
620
621/// Four bits of metadata that are stored and retrieved with the plural elements.
622#[derive(Debug, Copy, Clone, PartialEq, Eq)]
623#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
624#[repr(transparent)]
625pub struct FourBitMetadata(u8);
626
627impl FourBitMetadata {
628    /// Creates a [`FourBitMetadata`] if the given value fits in 4 bits.
629    pub fn try_from_byte(byte: u8) -> Option<Self> {
630        if byte < 0x80 {
631            Some(Self(byte))
632        } else {
633            None
634        }
635    }
636
637    /// Creates a [`FourBitMetadata`] with a zero value.
638    pub fn zero() -> Self {
639        Self(0)
640    }
641
642    /// Gets the value out of a [`FourBitMetadata`].
643    pub fn get(self) -> u8 {
644        self.0
645    }
646}
647
648/// A pair of [`PluralElementsKeys`] and [`FourBitMetadata`].
649#[derive(Debug, Copy, Clone)]
650struct PluralCategoryAndMetadata {
651    pub plural_category: PluralElementsKeys,
652    pub metadata: FourBitMetadata,
653}
654
655struct PluralCategoryAndMetadataUnpacked {
656    pub plural_category_byte: u8,
657    pub metadata_byte: u8,
658}
659
660/// Bitpacked struct for [`PluralCategoryAndMetadata`].
661#[derive(Debug, Copy, Clone)]
662#[repr(transparent)]
663struct PluralCategoryAndMetadataPackedULE(
664    /// Representation: `ppppmmmm`
665    /// - `pppp` are a valid [`PluralElementsKeys`]
666    /// - `mmmm` are a valid [`FourBitMetadata`]
667    ///
668    /// The valid values are determined by their respective types.
669    u8,
670);
671
672impl From<PluralCategoryAndMetadata> for PluralCategoryAndMetadataPackedULE {
673    fn from(value: PluralCategoryAndMetadata) -> Self {
674        let byte = ((value.plural_category as u8) << 4) | value.metadata.get();
675        debug_assert!(
676            PluralCategoryAndMetadata::try_from_unpacked(Self::unpack_byte(byte)).is_some()
677        );
678        Self(byte)
679    }
680}
681
682// # Safety
683//
684// Safety checklist for `ULE`:
685//
686// 1. The type is a single byte, not padding.
687// 2. The type is a single byte, so it has align(1).
688// 3. `validate_bytes` checks the validity of every byte.
689// 4. `validate_bytes` checks the validity of every byte.
690// 5. All other methods are be left with their default impl.
691// 6. The represented enums implement Eq by byte equality.
692unsafe impl ULE for PluralCategoryAndMetadataPackedULE {
693    fn validate_bytes(bytes: &[u8]) -> Result<(), zerovec::ule::UleError> {
694        bytes
695            .iter()
696            .all(|byte| {
697                let unpacked = Self::unpack_byte(*byte);
698                PluralCategoryAndMetadata::try_from_unpacked(unpacked).is_some()
699            })
700            .then_some(())
701            .ok_or_else(UleError::parse::<Self>)
702    }
703}
704
705impl PluralCategoryAndMetadataPackedULE {
706    fn unpack_byte(byte: u8) -> PluralCategoryAndMetadataUnpacked {
707        let plural_category_byte = (byte & 0xF0) >> 4;
708        let metadata_byte = byte & 0x0F;
709        PluralCategoryAndMetadataUnpacked {
710            plural_category_byte,
711            metadata_byte,
712        }
713    }
714
715    fn get(self) -> PluralCategoryAndMetadata {
716        let unpacked = Self::unpack_byte(self.0);
717        // Safety: by invariant, `self.0` contains valid values for PluralCategoryAndMetadata
718        unsafe { PluralCategoryAndMetadata::try_from_unpacked(unpacked).unwrap_unchecked() }
719    }
720}
721
722impl PluralCategoryAndMetadata {
723    fn try_from_unpacked(unpacked: PluralCategoryAndMetadataUnpacked) -> Option<Self> {
724        let plural_category = PluralElementsKeys::new_from_u8(unpacked.plural_category_byte)?;
725        let metadata = FourBitMetadata::try_from_byte(unpacked.metadata_byte)?;
726        Some(Self {
727            plural_category,
728            metadata,
729        })
730    }
731}
732
733impl AsULE for PluralCategoryAndMetadata {
734    type ULE = PluralCategoryAndMetadataPackedULE;
735    #[inline]
736    fn to_unaligned(self) -> Self::ULE {
737        PluralCategoryAndMetadataPackedULE::from(self)
738    }
739    #[inline]
740    fn from_unaligned(unaligned: Self::ULE) -> Self {
741        unaligned.get()
742    }
743}
744
745/// The type of the special patterns list.
746type PluralElementsTupleSliceVarULE<V> = VarZeroSlice<VarTupleULE<PluralCategoryAndMetadata, V>>;
747
748/// The type of the default value.
749type PluralElementWithMetadata<'a, T> = (FourBitMetadata, &'a T);
750
751/// Internal intermediate type that can be converted into a [`PluralElementsPackedULE`].
752struct PluralElementsPackedBuilder<'a, T> {
753    pub default: PluralElementWithMetadata<'a, T>,
754    pub specials: Option<Vec<VarTuple<PluralCategoryAndMetadata, &'a T>>>,
755}
756
757/// Internal unpacked and deserialized values from a [`PluralElementsPackedULE`].
758struct PluralElementsUnpacked<'a, V: VarULE + ?Sized> {
759    pub default: PluralElementWithMetadata<'a, V>,
760    pub specials: Option<&'a PluralElementsTupleSliceVarULE<V>>,
761}
762
763/// Internal unpacked bytes from a [`PluralElementsPackedULE`].
764struct PluralElementsUnpackedBytes<'a> {
765    pub lead_byte: u8,
766    pub v_bytes: &'a [u8],
767    pub specials_bytes: Option<&'a [u8]>,
768}
769
770/// Helper function to access a value from [`PluralElementsTupleSliceVarULE`]
771fn get_special<V: VarULE + ?Sized>(
772    data: &PluralElementsTupleSliceVarULE<V>,
773    key: PluralElementsKeys,
774) -> Option<(FourBitMetadata, &V)> {
775    data.iter()
776        .filter_map(|ule| {
777            let PluralCategoryAndMetadata {
778                plural_category,
779                metadata,
780            } = ule.sized.get();
781            (plural_category == key).then_some((metadata, &ule.variable))
782        })
783        .next()
784}
785
786impl<T> PluralElementsInner<(FourBitMetadata, T)>
787where
788    T: PartialEq,
789{
790    fn to_packed_builder<'a, V>(&'a self) -> PluralElementsPackedBuilder<'a, T>
791    where
792        &'a T: EncodeAsVarULE<V>,
793        V: VarULE + ?Sized,
794    {
795        let specials = self
796            .get_specials_tuples()
797            .map(|(plural_category, (metadata, t))| VarTuple {
798                sized: PluralCategoryAndMetadata {
799                    plural_category,
800                    metadata: *metadata,
801                },
802                variable: t,
803            })
804            .collect::<Vec<_>>();
805        PluralElementsPackedBuilder {
806            default: (self.other.0, &self.other.1),
807            specials: if specials.is_empty() {
808                None
809            } else {
810                Some(specials)
811            },
812        }
813    }
814}
815
816unsafe impl<T, V> EncodeAsVarULE<PluralElementsPackedULE<V>>
817    for PluralElements<(FourBitMetadata, T)>
818where
819    T: PartialEq + fmt::Debug,
820    for<'a> &'a T: EncodeAsVarULE<V>,
821    V: VarULE + ?Sized,
822{
823    fn encode_var_ule_as_slices<R>(&self, _cb: impl FnOnce(&[&[u8]]) -> R) -> R {
824        // unnecessary if the other two are implemented
825        unreachable!()
826    }
827
828    fn encode_var_ule_len(&self) -> usize {
829        let builder = self.0.to_packed_builder();
830        1 + builder.default.1.encode_var_ule_len()
831            + match builder.specials {
832                Some(specials) => {
833                    1 + EncodeAsVarULE::<PluralElementsTupleSliceVarULE<V>>::encode_var_ule_len(
834                        &specials,
835                    )
836                }
837                None => 0,
838            }
839    }
840
841    fn encode_var_ule_write(&self, dst: &mut [u8]) {
842        let builder = self.0.to_packed_builder();
843        #[allow(clippy::unwrap_used)] // by trait invariant
844        let (lead_byte, remainder) = dst.split_first_mut().unwrap();
845        *lead_byte = builder.default.0.get();
846        if let Some(specials) = builder.specials {
847            *lead_byte |= 0x80;
848            #[allow(clippy::unwrap_used)] // by trait invariant
849            let (second_byte, remainder) = remainder.split_first_mut().unwrap();
850            *second_byte = match u8::try_from(builder.default.1.encode_var_ule_len()) {
851                Ok(x) => x,
852                // TODO: Inform the user more nicely that their data doesn't fit in our packed structure
853                #[allow(clippy::panic)] // for now okay since it is mostly only during datagen
854                Err(_) => {
855                    panic!("other value too long to be packed: {self:?}")
856                }
857            };
858            #[allow(clippy::unwrap_used)] // for now okay since it is mostly only during datagen
859            let (v_bytes, specials_bytes) = remainder
860                .split_at_mut_checked(*second_byte as usize)
861                .unwrap();
862            builder.default.1.encode_var_ule_write(v_bytes);
863            EncodeAsVarULE::<PluralElementsTupleSliceVarULE<V>>::encode_var_ule_write(
864                &specials,
865                specials_bytes,
866            );
867        } else {
868            builder.default.1.encode_var_ule_write(remainder)
869        };
870    }
871}
872
873#[cfg(feature = "datagen")]
874impl<'a, V> PluralElementsInner<(FourBitMetadata, &'a V)>
875where
876    V: VarULE + ?Sized,
877{
878    fn from_packed(packed: &'a PluralElementsPackedULE<V>) -> Self {
879        let parts = packed.as_parts();
880        PluralElementsInner {
881            other: parts.default,
882            zero: parts
883                .specials
884                .and_then(|specials| get_special(specials, PluralElementsKeys::Zero)),
885            one: parts
886                .specials
887                .and_then(|specials| get_special(specials, PluralElementsKeys::One)),
888            two: parts
889                .specials
890                .and_then(|specials| get_special(specials, PluralElementsKeys::Two)),
891            few: parts
892                .specials
893                .and_then(|specials| get_special(specials, PluralElementsKeys::Few)),
894            many: parts
895                .specials
896                .and_then(|specials| get_special(specials, PluralElementsKeys::Many)),
897            explicit_zero: parts
898                .specials
899                .and_then(|specials| get_special(specials, PluralElementsKeys::ExplicitZero)),
900            explicit_one: parts
901                .specials
902                .and_then(|specials| get_special(specials, PluralElementsKeys::ExplicitOne)),
903        }
904    }
905}
906
907#[cfg(feature = "serde")]
908impl<T> PluralElementsInner<(FourBitMetadata, T)> {
909    fn into_packed<V>(self) -> Box<PluralElementsPackedULE<V>>
910    where
911        T: PartialEq + fmt::Debug,
912        for<'a> &'a T: EncodeAsVarULE<V>,
913        V: VarULE + ?Sized,
914    {
915        zerovec::ule::encode_varule_to_box(&PluralElements(self))
916    }
917}
918
919#[cfg(feature = "serde")]
920impl<'de, 'data, V> serde::Deserialize<'de> for &'data PluralElementsPackedULE<V>
921where
922    'de: 'data,
923    V: VarULE + ?Sized,
924{
925    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
926    where
927        D: serde::Deserializer<'de>,
928    {
929        if deserializer.is_human_readable() {
930            Err(serde::de::Error::custom(
931                "&PluralElementsPackedULE cannot be deserialized from human-readable formats",
932            ))
933        } else {
934            let bytes = <&[u8]>::deserialize(deserializer)?;
935            PluralElementsPackedULE::<V>::parse_bytes(bytes).map_err(serde::de::Error::custom)
936        }
937    }
938}
939
940#[cfg(feature = "serde")]
941impl<'de, V> serde::Deserialize<'de> for Box<PluralElementsPackedULE<V>>
942where
943    V: VarULE + ?Sized,
944    Box<V>: serde::Deserialize<'de> + PartialEq + fmt::Debug,
945{
946    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
947    where
948        D: serde::Deserializer<'de>,
949    {
950        if deserializer.is_human_readable() {
951            let plural_elements: PluralElementsInner<(FourBitMetadata, Box<V>)> =
952                PluralElementsInner::deserialize(deserializer)?;
953            Ok(plural_elements.into_packed())
954        } else {
955            let bytes = <&[u8]>::deserialize(deserializer)?;
956            PluralElementsPackedULE::<V>::parse_bytes(bytes)
957                .map(|ule| ule.to_owned())
958                .map_err(serde::de::Error::custom)
959        }
960    }
961}
962
963#[cfg(feature = "datagen")]
964impl<V> serde::Serialize for PluralElementsPackedULE<V>
965where
966    V: PartialEq + serde::Serialize + VarULE + ?Sized,
967{
968    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
969    where
970        S: serde::Serializer,
971    {
972        if serializer.is_human_readable() {
973            let plural_elements: PluralElementsInner<(FourBitMetadata, &V)> =
974                PluralElementsInner::from_packed(self);
975            plural_elements.serialize(serializer)
976        } else {
977            serializer.serialize_bytes(self.as_bytes())
978        }
979    }
980}
981
982#[cfg(feature = "datagen")]
983impl<'a, V> databake::Bake for &'a PluralElementsPackedULE<V>
984where
985    &'a V: databake::Bake,
986    V: VarULE + ?Sized,
987{
988    fn bake(&self, ctx: &databake::CrateEnv) -> databake::TokenStream {
989        ctx.insert("icu_plurals");
990        let bytes = (&self.bytes).bake(ctx);
991        databake::quote! {
992            // Safety: the bytes came directly from self.bytes on the previous line.
993            unsafe { icu_plurals::provider::PluralElementsPackedULE::from_bytes_unchecked(#bytes) }
994        }
995    }
996}
997
998#[cfg(feature = "datagen")]
999impl<'a, V> databake::BakeSize for &'a PluralElementsPackedULE<V>
1000where
1001    &'a V: databake::Bake,
1002    V: VarULE + ?Sized,
1003{
1004    fn borrows_size(&self) -> usize {
1005        self.bytes.len()
1006    }
1007}
1008
1009/// Helper function to properly deserialize a `Cow<PluralElementsPackedULE<V>>`
1010///
1011/// Due to <https://github.com/rust-lang/rust/issues/130180>, you may need to qualify
1012/// `V` when invoking this, like so:
1013///
1014/// `#[serde(deserialize_with = "deserialize_plural_elements_packed_cow::<_, str>")]`
1015///
1016/// See <https://github.com/unicode-org/icu4x/pull/1556>
1017#[cfg(feature = "serde")]
1018fn deserialize_plural_elements_packed_cow<'de, 'data, D, V>(
1019    deserializer: D,
1020) -> Result<Cow<'data, PluralElementsPackedULE<V>>, D::Error>
1021where
1022    'de: 'data,
1023    D: serde::Deserializer<'de>,
1024    V: VarULE + ?Sized,
1025    Box<PluralElementsPackedULE<V>>: serde::Deserialize<'de>,
1026{
1027    use serde::Deserialize;
1028    if deserializer.is_human_readable() {
1029        let value = Box::<PluralElementsPackedULE<V>>::deserialize(deserializer)?;
1030        Ok(Cow::Owned(value))
1031    } else {
1032        let value = <&'de PluralElementsPackedULE<V>>::deserialize(deserializer)?;
1033        Ok(Cow::Borrowed(value))
1034    }
1035}
1036
1037// Need a manual impl because the derive(Clone) impl bounds are wrong
1038impl<V> Clone for PluralElementsPackedCow<'_, V>
1039where
1040    V: VarULE + ?Sized,
1041{
1042    fn clone(&self) -> Self {
1043        Self {
1044            elements: self.elements.clone(),
1045        }
1046    }
1047}
1048
1049impl<T, V> From<PluralElements<T>> for PluralElementsPackedCow<'static, V>
1050where
1051    V: VarULE + ?Sized,
1052    T: PartialEq + fmt::Debug,
1053    for<'a> &'a T: EncodeAsVarULE<V>,
1054{
1055    fn from(value: PluralElements<T>) -> Self {
1056        let elements =
1057            zerovec::ule::encode_varule_to_box(&value.map(|s| (FourBitMetadata::zero(), s)));
1058        Self {
1059            elements: Cow::Owned(elements),
1060        }
1061    }
1062}
1063
1064impl<V> PluralElementsPackedCow<'_, V>
1065where
1066    V: VarULE + ?Sized,
1067{
1068    /// Returns the value for the given [`PluralOperands`] and [`PluralRules`].
1069    pub fn get<'a>(&'a self, op: PluralOperands, rules: &PluralRules) -> &'a V {
1070        self.elements.get(op, rules).1
1071    }
1072}
1073
1074#[test]
1075fn test_serde_singleton_roundtrip() {
1076    let plural_elements = PluralElements::new((FourBitMetadata::zero(), "abc"));
1077    let ule = zerovec::ule::encode_varule_to_box(&plural_elements);
1078
1079    let postcard_bytes = postcard::to_allocvec(&ule).unwrap();
1080    assert_eq!(
1081        postcard_bytes,
1082        &[
1083            4,    // Postcard header
1084            0x00, // Discriminant
1085            b'a', b'b', b'c', // String
1086        ]
1087    );
1088
1089    let postcard_ule: Box<PluralElementsPackedULE<str>> =
1090        postcard::from_bytes(&postcard_bytes).unwrap();
1091    assert_eq!(ule, postcard_ule);
1092
1093    let postcard_borrowed: &PluralElementsPackedULE<str> =
1094        postcard::from_bytes(&postcard_bytes).unwrap();
1095    assert_eq!(&*ule, postcard_borrowed);
1096
1097    let postcard_cow: PluralElementsPackedCow<str> = postcard::from_bytes(&postcard_bytes).unwrap();
1098    assert_eq!(&*ule, &*postcard_cow.elements);
1099    assert!(matches!(postcard_cow.elements, Cow::Borrowed(_)));
1100
1101    let json_str = serde_json::to_string(&ule).unwrap();
1102    let json_ule: Box<PluralElementsPackedULE<str>> = serde_json::from_str(&json_str).unwrap();
1103    assert_eq!(ule, json_ule);
1104}
1105
1106#[test]
1107fn test_serde_nonsingleton_roundtrip() {
1108    let plural_elements = PluralElements::new((FourBitMetadata::zero(), "abc"))
1109        .with_one_value(Some((FourBitMetadata::zero(), "defg")));
1110    let ule = zerovec::ule::encode_varule_to_box(&plural_elements);
1111
1112    let postcard_bytes = postcard::to_allocvec(&ule).unwrap();
1113    assert_eq!(
1114        postcard_bytes,
1115        &[
1116            12,   // Postcard header
1117            0x80, // Discriminant
1118            3, b'a', b'b', b'c', // String of length 3
1119            1, 0, // VarZeroVec of length 1
1120            0x10, b'd', b'e', b'f', b'g' // Plural category 1 and string "defg"
1121        ]
1122    );
1123
1124    let postcard_ule: Box<PluralElementsPackedULE<str>> =
1125        postcard::from_bytes(&postcard_bytes).unwrap();
1126    assert_eq!(ule, postcard_ule);
1127
1128    let postcard_borrowed: &PluralElementsPackedULE<str> =
1129        postcard::from_bytes(&postcard_bytes).unwrap();
1130    assert_eq!(&*ule, postcard_borrowed);
1131
1132    let json_str = serde_json::to_string(&ule).unwrap();
1133    let json_ule: Box<PluralElementsPackedULE<str>> = serde_json::from_str(&json_str).unwrap();
1134    assert_eq!(ule, json_ule);
1135}