icu_plurals/
provider.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5// Provider structs must be stable
6#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
7
8//! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component.
9//!
10//! <div class="stab unstable">
11//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
12//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
13//! to be stable, their Rust representation might not be. Use with caution.
14//! </div>
15//!
16//! Read more about data providers: [`icu_provider`]
17
18use crate::provider::rules::runtime::ast::Rule;
19use crate::{PluralCategory, PluralElements, PluralElementsInner, PluralOperands, PluralRules};
20use alloc::borrow::{Cow, ToOwned};
21use alloc::boxed::Box;
22use alloc::vec::Vec;
23use core::fmt;
24use core::marker::PhantomData;
25use icu_provider::prelude::*;
26use yoke::Yokeable;
27use zerofrom::ZeroFrom;
28use zerovec::ule::vartuple::VarTuple;
29use zerovec::ule::vartuple::VarTupleULE;
30use zerovec::ule::AsULE;
31use zerovec::ule::EncodeAsVarULE;
32use zerovec::ule::UleError;
33use zerovec::ule::VarULE;
34use zerovec::ule::ULE;
35use zerovec::VarZeroSlice;
36
37pub mod rules;
38
39#[cfg(feature = "compiled_data")]
40#[derive(Debug)]
41/// Baked data
42///
43/// <div class="stab unstable">
44/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
45/// including in SemVer minor releases. In particular, the `DataProvider` implementations are only
46/// guaranteed to match with this version's `*_unstable` providers. Use with caution.
47/// </div>
48pub struct Baked;
49
50#[cfg(feature = "compiled_data")]
51#[allow(unused_imports)]
52const _: () = {
53    use icu_plurals_data::*;
54    mod icu {
55        pub use crate as plurals;
56        pub use icu_locale as locale;
57    }
58
59    make_provider!(Baked);
60    impl_plurals_cardinal_v1!(Baked);
61    impl_plurals_ordinal_v1!(Baked);
62    #[cfg(feature = "experimental")]
63    impl_plurals_ranges_v1!(Baked);
64};
65
66icu_provider::data_marker!(
67    /// Data for cardinal classification
68    PluralsCardinalV1,
69    "plurals/cardinal/v1",
70    PluralRulesData<'static>,
71);
72
73icu_provider::data_marker!(
74    /// Data for ordinal classification
75    PluralsOrdinalV1,
76    "plurals/ordinal/v1",
77    PluralRulesData<'static>,
78);
79
80#[cfg(feature = "experimental")]
81icu_provider::data_marker!(
82    /// Data for plural range formatting
83    PluralsRangesV1,
84    "plurals/ranges/v1",
85    PluralRanges<'static>
86);
87
88#[cfg(feature = "datagen")]
89/// The latest minimum set of markers required by this component.
90pub const MARKERS: &[DataMarkerInfo] = &[
91    PluralsCardinalV1::INFO,
92    PluralsOrdinalV1::INFO,
93    #[cfg(feature = "experimental")]
94    PluralsRangesV1::INFO,
95];
96
97/// Plural rule strings conforming to UTS 35 syntax. Includes separate fields for five of the six
98/// standard plural forms. If none of the rules match, the "other" category is assumed.
99///
100/// More information: <https://unicode.org/reports/tr35/tr35-numbers.html#Language_Plural_Rules>
101///
102/// <div class="stab unstable">
103/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
104/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
105/// to be stable, their Rust representation might not be. Use with caution.
106/// </div>
107#[derive(Default, Clone, PartialEq, Debug, Yokeable, ZeroFrom)]
108#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
109#[cfg_attr(feature = "datagen", databake(path = icu_plurals::provider))]
110#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
111pub struct PluralRulesData<'data> {
112    /// Rule that matches [`PluralCategory::Zero`], or `None` if not present.
113    #[cfg_attr(feature = "serde", serde(borrow))]
114    pub zero: Option<Rule<'data>>,
115    /// Rule that matches [`PluralCategory::One`], or `None` if not present.
116    #[cfg_attr(feature = "serde", serde(borrow))]
117    pub one: Option<Rule<'data>>,
118    /// Rule that matches [`PluralCategory::Two`], or `None` if not present.
119    #[cfg_attr(feature = "serde", serde(borrow))]
120    pub two: Option<Rule<'data>>,
121    /// Rule that matches [`PluralCategory::Few`], or `None` if not present.
122    #[cfg_attr(feature = "serde", serde(borrow))]
123    pub few: Option<Rule<'data>>,
124    /// Rule that matches [`PluralCategory::Many`], or `None` if not present.
125    #[cfg_attr(feature = "serde", serde(borrow))]
126    pub many: Option<Rule<'data>>,
127}
128
129icu_provider::data_struct!(
130    PluralRulesData<'_>,
131    #[cfg(feature = "datagen")]
132);
133
134#[cfg(feature = "experimental")]
135pub use ranges::*;
136
137#[cfg(feature = "experimental")]
138mod ranges {
139    use super::*;
140    use zerovec::ZeroMap;
141
142    /// [`PluralCategory`] but serializable as provider data.
143    ///
144    /// <div class="stab unstable">
145    /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
146    /// including in SemVer minor releases. While the serde representation of data structs is guaranteed
147    /// to be stable, their Rust representation might not be. Use with caution.
148    /// </div>
149    #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, Ord, PartialOrd)]
150    #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
151    #[cfg_attr(feature = "datagen", databake(path = icu_plurals::provider))]
152    #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
153    #[zerovec::make_ule(RawPluralCategoryULE)]
154    #[repr(u8)]
155    #[cfg_attr(
156        any(feature = "datagen", feature = "serde"),
157        serde(rename_all = "lowercase")
158    )]
159    pub enum RawPluralCategory {
160        /// CLDR "other" plural category.
161        Other = 0,
162        /// CLDR "zero" plural category.
163        Zero = 1,
164        /// CLDR "one" plural category.
165        One = 2,
166        /// CLDR "two" plural category.
167        Two = 3,
168        /// CLDR "few" plural category.
169        Few = 4,
170        /// CLDR "many" plural category.
171        Many = 5,
172    }
173
174    impl RawPluralCategory {
175        /// Gets the corresponding variant string of this `RawPluralCategory`.
176        #[cfg(any(feature = "datagen", feature = "serde"))]
177        const fn as_str(self) -> &'static str {
178            match self {
179                Self::Other => "other",
180                Self::Zero => "zero",
181                Self::One => "one",
182                Self::Two => "two",
183                Self::Few => "few",
184                Self::Many => "many",
185            }
186        }
187    }
188
189    impl From<RawPluralCategory> for PluralCategory {
190        fn from(value: RawPluralCategory) -> Self {
191            match value {
192                RawPluralCategory::Other => PluralCategory::Other,
193                RawPluralCategory::Zero => PluralCategory::Zero,
194                RawPluralCategory::One => PluralCategory::One,
195                RawPluralCategory::Two => PluralCategory::Two,
196                RawPluralCategory::Few => PluralCategory::Few,
197                RawPluralCategory::Many => PluralCategory::Many,
198            }
199        }
200    }
201
202    impl From<PluralCategory> for RawPluralCategory {
203        fn from(value: PluralCategory) -> Self {
204            match value {
205                PluralCategory::Zero => RawPluralCategory::Zero,
206                PluralCategory::One => RawPluralCategory::One,
207                PluralCategory::Two => RawPluralCategory::Two,
208                PluralCategory::Few => RawPluralCategory::Few,
209                PluralCategory::Many => RawPluralCategory::Many,
210                PluralCategory::Other => RawPluralCategory::Other,
211            }
212        }
213    }
214
215    /// An `u8` that is expected to be a plural range, but does not enforce this invariant.
216    ///
217    /// <div class="stab unstable">
218    /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
219    /// including in SemVer minor releases. While the serde representation of data structs is guaranteed
220    /// to be stable, their Rust representation might not be. Use with caution.
221    /// </div>
222    #[derive(Debug, PartialEq, Eq, Clone, Copy, Hash, Ord, PartialOrd)]
223    #[cfg_attr(feature = "datagen", derive(databake::Bake))]
224    #[cfg_attr(feature = "datagen", databake(path = icu_plurals::provider))]
225    #[zerovec::make_ule(UnvalidatedPluralRangeULE)]
226    pub struct UnvalidatedPluralRange(pub u8);
227
228    impl UnvalidatedPluralRange {
229        /// Creates a new `UnvalidatedPluralRange` from a category range.
230        pub fn from_range(start: RawPluralCategory, end: RawPluralCategory) -> Self {
231            let start = start as u8;
232            let end = end as u8;
233
234            debug_assert!(start < 16);
235            debug_assert!(end < 16);
236
237            let range = (start << 4) | end;
238
239            Self(range)
240        }
241    }
242
243    #[cfg(feature = "datagen")]
244    impl serde::Serialize for UnvalidatedPluralRange {
245        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
246        where
247            S: serde::Serializer,
248        {
249            use serde::ser::Error;
250
251            struct PrettyPrinter(RawPluralCategory, RawPluralCategory);
252
253            impl core::fmt::Display for PrettyPrinter {
254                fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
255                    f.write_str(self.0.as_str())?;
256                    f.write_str("--")?;
257                    f.write_str(self.1.as_str())
258                }
259            }
260
261            if serializer.is_human_readable() {
262                let start = RawPluralCategory::new_from_u8(self.0 >> 4)
263                    .ok_or_else(|| S::Error::custom("invalid tag in UnvalidatedPluralRange"))?;
264                let end = RawPluralCategory::new_from_u8(self.0 & 0x0F)
265                    .ok_or_else(|| S::Error::custom("invalid tag in UnvalidatedPluralRange"))?;
266                serializer.collect_str(&PrettyPrinter(start, end))
267            } else {
268                self.0.serialize(serializer)
269            }
270        }
271    }
272
273    #[cfg(feature = "serde")]
274    impl<'de> serde::Deserialize<'de> for UnvalidatedPluralRange {
275        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
276        where
277            D: serde::Deserializer<'de>,
278        {
279            use serde::de::{Error, Visitor};
280
281            struct HumanReadableVisitor;
282
283            impl Visitor<'_> for HumanReadableVisitor {
284                type Value = UnvalidatedPluralRange;
285
286                fn expecting(&self, formatter: &mut alloc::fmt::Formatter) -> alloc::fmt::Result {
287                    write!(
288                        formatter,
289                        "a plural range of the form <PluralCategory>-<PluralCategory>",
290                    )
291                }
292
293                fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
294                where
295                    E: Error,
296                {
297                    const VARIANTS: [&str; 6] = [
298                        RawPluralCategory::Other.as_str(),
299                        RawPluralCategory::Zero.as_str(),
300                        RawPluralCategory::One.as_str(),
301                        RawPluralCategory::Two.as_str(),
302                        RawPluralCategory::Few.as_str(),
303                        RawPluralCategory::Many.as_str(),
304                    ];
305
306                    let (start, end) = v
307                        .split_once("--")
308                        .ok_or_else(|| E::custom("expected token `--` in plural range"))?;
309
310                    let start = PluralCategory::get_for_cldr_string(start)
311                        .ok_or_else(|| E::unknown_variant(start, &VARIANTS))?;
312                    let end = PluralCategory::get_for_cldr_string(end)
313                        .ok_or_else(|| E::unknown_variant(end, &VARIANTS))?;
314
315                    Ok(UnvalidatedPluralRange::from_range(start.into(), end.into()))
316                }
317            }
318
319            if deserializer.is_human_readable() {
320                deserializer.deserialize_str(HumanReadableVisitor)
321            } else {
322                Ok(Self(<u8>::deserialize(deserializer)?))
323            }
324        }
325    }
326
327    /// Plural categories for ranges.
328    ///
329    /// Obtains the plural category of a range from the categories of its endpoints. It is required that
330    /// the start value must be strictly less than the end value, and both values must be strictly positive.
331    ///
332    /// More information: <https://unicode.org/reports/tr35/tr35-numbers.html#Plural_Ranges>
333    ///
334    /// <div class="stab unstable">
335    /// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
336    /// including in SemVer minor releases. While the serde representation of data structs is guaranteed
337    /// to be stable, their Rust representation might not be. Use with caution.
338    /// </div>
339    #[derive(Clone, PartialEq, Debug, Yokeable, ZeroFrom)]
340    #[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
341    #[cfg_attr(feature = "datagen", databake(path = icu_plurals::provider))]
342    #[cfg_attr(feature = "serde", derive(serde::Deserialize))]
343    #[yoke(prove_covariance_manually)]
344    pub struct PluralRanges<'data> {
345        /// Map between the categories of the endpoints of a range and its corresponding
346        /// category.
347        ///
348        /// This is roughly equivalent to a `BTreeMap<(PluralCategory, PluralCategory), PluralCategory>`,
349        /// where the key is `(start category, end category)`.
350        #[cfg_attr(feature = "serde", serde(borrow))]
351        pub ranges: ZeroMap<'data, UnvalidatedPluralRange, RawPluralCategory>,
352    }
353
354    icu_provider::data_struct!(
355        PluralRanges<'_>,
356        #[cfg(feature = "datagen")]
357    );
358}
359
360/// A sized packed [`PluralElements`] suitable for use in data structs.
361///
362/// This type has the following limitations:
363///
364/// 1. It only supports `str`
365/// 2. It does not implement [`VarULE`] so it can't be used in a [`VarZeroSlice`]
366/// 3. It always serializes the [`FourBitMetadata`] as 0
367///
368/// Use [`PluralElementsPackedULE`] directly if you need these additional features.
369#[derive(Debug, PartialEq, Yokeable, ZeroFrom)]
370#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
371#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
372#[cfg_attr(feature = "datagen", databake(path = icu_plurals::provider))]
373#[cfg_attr(
374    feature = "serde",
375    serde(
376        transparent,
377        bound(
378            serialize = "V: serde::Serialize + PartialEq",
379            deserialize = "Box<PluralElementsPackedULE<V>>: serde::Deserialize<'de>"
380        )
381    )
382)]
383pub struct PluralElementsPackedCow<'data, V: VarULE + ?Sized> {
384    /// The encoded elements.
385    #[cfg_attr(
386        feature = "serde",
387        serde(
388            borrow,
389            deserialize_with = "deserialize_plural_elements_packed_cow::<_, V>"
390        )
391    )]
392    pub elements: Cow<'data, PluralElementsPackedULE<V>>,
393}
394
395/// A bitpacked DST for [`PluralElements`].
396///
397/// Can be put in a [`Cow`] or a [`VarZeroSlice`].
398#[derive(PartialEq, Eq)]
399#[repr(transparent)]
400pub struct PluralElementsPackedULE<V: VarULE + ?Sized> {
401    _v: PhantomData<V>,
402    /// Invariant Representation:
403    ///
404    /// First byte: `d...mmmm`
405    /// - `d` = 0 if singleton, 1 if a map
406    /// - `...` = padding, should be 0
407    /// - `mmmm` = [`FourBitMetadata`] for the default value
408    ///
409    /// If d is 0:
410    /// - Remainder: the default (plural "other") value `V`
411    ///
412    /// If d is 1:
413    /// - Second byte: L = the length of `V`
414    /// - Bytes 2..(2+L): the default (plural "other") value `V`
415    /// - Remainder: [`PluralElementsTupleSliceVarULE`]
416    bytes: [u8],
417}
418
419impl<V: VarULE + fmt::Debug + ?Sized> fmt::Debug for PluralElementsPackedULE<V> {
420    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
421        let unpacked = self.as_parts();
422        f.debug_struct("PluralElementsPackedULE")
423            .field("parts", &unpacked)
424            .field("bytes", &&self.bytes)
425            .finish()
426    }
427}
428
429impl<V: VarULE + ?Sized> ToOwned for PluralElementsPackedULE<V> {
430    type Owned = Box<PluralElementsPackedULE<V>>;
431    fn to_owned(&self) -> Self::Owned {
432        self.to_boxed()
433    }
434}
435
436// Safety (based on the safety checklist on the VarULE trait):
437//  1. PluralElementsPackedULE does not include any uninitialized or padding bytes: it is transparent over a VarULE type ([u8])
438//  2. PluralElementsPackedULE is aligned to 1 byte: it is transparent over a VarULE type ([u8])
439//  3. The impl of `validate_bytes()` returns an error if any byte is not valid.
440//  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety
441//  5. The impl of `from_bytes_unchecked()` returns a reference to the same data.
442//  6. `parse_bytes()` is equivalent to `validate_bytes()` followed by `from_bytes_unchecked()`
443//  7. byte equality is semantic equality
444unsafe impl<V> VarULE for PluralElementsPackedULE<V>
445where
446    V: VarULE + ?Sized,
447{
448    fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
449        let unpacked_bytes =
450            Self::unpack_bytes(bytes).ok_or_else(|| UleError::length::<Self>(bytes.len()))?;
451        // The high bit of lead_byte was read in unpack_bytes.
452        // Bits 0-3 are FourBitMetadata.
453        // We expect bits 4-6 to be padding.
454        if unpacked_bytes.lead_byte & 0x70 != 0 {
455            return Err(UleError::parse::<Self>());
456        }
457        // Now validate the two variable-length slices.
458        V::validate_bytes(unpacked_bytes.v_bytes)?;
459        if let Some(specials_bytes) = unpacked_bytes.specials_bytes {
460            PluralElementsTupleSliceVarULE::<V>::validate_bytes(specials_bytes)?;
461        }
462        Ok(())
463    }
464
465    unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
466        // Safety: the bytes are valid by trait invariant, and we are transparent over bytes
467        core::mem::transmute(bytes)
468    }
469}
470
471impl<V> PluralElementsPackedULE<V>
472where
473    V: VarULE + ?Sized,
474{
475    /// Casts a byte slice to a [`PluralElementsPackedULE`].
476    ///
477    /// # Safety
478    ///
479    /// The bytes must be valid according to [`PluralElementsPackedULE::validate_bytes`].
480    pub const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
481        // Safety: the bytes are valid by trait invariant, and we are transparent over bytes
482        core::mem::transmute(bytes)
483    }
484
485    /// Returns a tuple with:
486    /// 1. The lead byte
487    /// 2. Bytes corresponding to the default V
488    /// 3. Bytes corresponding to the specials slice, if present
489    #[inline]
490    fn unpack_bytes(bytes: &[u8]) -> Option<PluralElementsUnpackedBytes<'_>> {
491        let (lead_byte, remainder) = bytes.split_first()?;
492        if lead_byte & 0x80 == 0 {
493            Some(PluralElementsUnpackedBytes {
494                lead_byte: *lead_byte,
495                v_bytes: remainder,
496                specials_bytes: None,
497            })
498        } else {
499            let (second_byte, remainder) = remainder.split_first()?;
500            let (v_bytes, remainder) = remainder.split_at_checked(*second_byte as usize)?;
501            Some(PluralElementsUnpackedBytes {
502                lead_byte: *lead_byte,
503                v_bytes,
504                specials_bytes: Some(remainder),
505            })
506        }
507    }
508
509    /// Unpacks this structure into the default value and the optional list of specials.
510    fn as_parts(&self) -> PluralElementsUnpacked<'_, V> {
511        // Safety: the bytes are valid by invariant
512        let unpacked_bytes = unsafe { Self::unpack_bytes(&self.bytes).unwrap_unchecked() };
513        let metadata = FourBitMetadata(unpacked_bytes.lead_byte & 0x0F);
514        // Safety: the bytes are valid by invariant
515        let default = unsafe { V::from_bytes_unchecked(unpacked_bytes.v_bytes) };
516        #[expect(clippy::manual_map)] // more explicit with the unsafe code
517        let specials = if let Some(specials_bytes) = unpacked_bytes.specials_bytes {
518            // Safety: the bytes are valid by invariant
519            Some(unsafe {
520                PluralElementsTupleSliceVarULE::<V>::from_bytes_unchecked(specials_bytes)
521            })
522        } else {
523            None
524        };
525        PluralElementsUnpacked {
526            default: (metadata, default),
527            specials,
528        }
529    }
530
531    /// Returns the value for the given [`PluralOperands`] and [`PluralRules`].
532    pub fn get<'a>(&'a self, op: PluralOperands, rules: &PluralRules) -> (FourBitMetadata, &'a V) {
533        let parts = self.as_parts();
534
535        let category = rules.category_for(op);
536
537        match parts.specials {
538            Some(specials) => {
539                if op.is_exactly_zero() {
540                    if let Some(value) = get_special(specials, PluralElementsKeys::ExplicitZero) {
541                        return value;
542                    }
543                }
544                if op.is_exactly_one() {
545                    if let Some(value) = get_special(specials, PluralElementsKeys::ExplicitOne) {
546                        return value;
547                    }
548                }
549                match category {
550                    PluralCategory::Zero => Some(PluralElementsKeys::Zero),
551                    PluralCategory::One => Some(PluralElementsKeys::One),
552                    PluralCategory::Two => Some(PluralElementsKeys::Two),
553                    PluralCategory::Few => Some(PluralElementsKeys::Few),
554                    PluralCategory::Many => Some(PluralElementsKeys::Many),
555                    PluralCategory::Other => None,
556                }
557                .and_then(|key| get_special(specials, key))
558            }
559            None => None,
560        }
561        .unwrap_or(parts.default)
562    }
563
564    /// Recovers the [`PluralElements`] corresponding to this packed structure.
565    #[cfg(feature = "datagen")]
566    pub fn decode(&self) -> PluralElements<(FourBitMetadata, &V)> {
567        PluralElements(PluralElementsInner::from_packed(self))
568    }
569
570    /// Returns the value for the default ("other") plural variant.
571    pub fn get_default(&self) -> (FourBitMetadata, &V) {
572        self.as_parts().default
573    }
574}
575
576#[derive(Debug, Clone, Copy, PartialEq, Eq)]
577#[zerovec::make_ule(PluralCategoryULE)]
578#[repr(u8)]
579#[cfg_attr(feature = "datagen", derive(serde::Serialize))]
580#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
581enum PluralElementsKeys {
582    Zero = 0,
583    One = 1,
584    Two = 2,
585    Few = 3,
586    Many = 4,
587    ExplicitZero = 5,
588    ExplicitOne = 6,
589}
590
591impl<T> PluralElementsInner<T>
592where
593    T: PartialEq,
594{
595    fn get_specials_tuples(&self) -> impl Iterator<Item = (PluralElementsKeys, &T)> {
596        [
597            self.zero
598                .as_ref()
599                .filter(|&p| *p != self.other)
600                .map(|s| (PluralElementsKeys::Zero, s)),
601            self.one
602                .as_ref()
603                .filter(|&p| *p != self.other)
604                .map(|s| (PluralElementsKeys::One, s)),
605            self.two
606                .as_ref()
607                .filter(|&p| *p != self.other)
608                .map(|s| (PluralElementsKeys::Two, s)),
609            self.few
610                .as_ref()
611                .filter(|&p| *p != self.other)
612                .map(|s| (PluralElementsKeys::Few, s)),
613            self.many
614                .as_ref()
615                .filter(|&p| *p != self.other)
616                .map(|s| (PluralElementsKeys::Many, s)),
617            self.explicit_zero
618                .as_ref()
619                .filter(|&p| *p != self.other)
620                .map(|s| (PluralElementsKeys::ExplicitZero, s)),
621            self.explicit_one
622                .as_ref()
623                .filter(|&p| *p != self.other)
624                .map(|s| (PluralElementsKeys::ExplicitOne, s)),
625        ]
626        .into_iter()
627        .flatten()
628    }
629}
630
631/// Four bits of metadata that are stored and retrieved with the plural elements.
632#[derive(Debug, Copy, Clone, PartialEq, Eq)]
633#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
634#[repr(transparent)]
635pub struct FourBitMetadata(u8);
636
637impl FourBitMetadata {
638    /// Creates a [`FourBitMetadata`] if the given value fits in 4 bits.
639    pub fn try_from_byte(byte: u8) -> Option<Self> {
640        if byte < 0x80 {
641            Some(Self(byte))
642        } else {
643            None
644        }
645    }
646
647    /// Creates a [`FourBitMetadata`] with a zero value.
648    pub fn zero() -> Self {
649        Self(0)
650    }
651
652    /// Gets the value out of a [`FourBitMetadata`].
653    pub fn get(self) -> u8 {
654        self.0
655    }
656}
657
658/// A pair of [`PluralElementsKeys`] and [`FourBitMetadata`].
659#[derive(Debug, Copy, Clone)]
660struct PluralCategoryAndMetadata {
661    pub plural_category: PluralElementsKeys,
662    pub metadata: FourBitMetadata,
663}
664
665struct PluralCategoryAndMetadataUnpacked {
666    pub plural_category_byte: u8,
667    pub metadata_byte: u8,
668}
669
670/// Bitpacked struct for [`PluralCategoryAndMetadata`].
671#[derive(Debug, Copy, Clone)]
672#[repr(transparent)]
673struct PluralCategoryAndMetadataPackedULE(
674    /// Representation: `ppppmmmm`
675    /// - `pppp` are a valid [`PluralElementsKeys`]
676    /// - `mmmm` are a valid [`FourBitMetadata`]
677    ///
678    /// The valid values are determined by their respective types.
679    u8,
680);
681
682impl From<PluralCategoryAndMetadata> for PluralCategoryAndMetadataPackedULE {
683    fn from(value: PluralCategoryAndMetadata) -> Self {
684        let byte = ((value.plural_category as u8) << 4) | value.metadata.get();
685        debug_assert!(
686            PluralCategoryAndMetadata::try_from_unpacked(Self::unpack_byte(byte)).is_some()
687        );
688        Self(byte)
689    }
690}
691
692// # Safety
693//
694// Safety checklist for `ULE`:
695//
696// 1. The type is a single byte, not padding.
697// 2. The type is a single byte, so it has align(1).
698// 3. `validate_bytes` checks the validity of every byte.
699// 4. `validate_bytes` checks the validity of every byte.
700// 5. All other methods are be left with their default impl.
701// 6. The represented enums implement Eq by byte equality.
702unsafe impl ULE for PluralCategoryAndMetadataPackedULE {
703    fn validate_bytes(bytes: &[u8]) -> Result<(), zerovec::ule::UleError> {
704        bytes
705            .iter()
706            .all(|byte| {
707                let unpacked = Self::unpack_byte(*byte);
708                PluralCategoryAndMetadata::try_from_unpacked(unpacked).is_some()
709            })
710            .then_some(())
711            .ok_or_else(UleError::parse::<Self>)
712    }
713}
714
715impl PluralCategoryAndMetadataPackedULE {
716    fn unpack_byte(byte: u8) -> PluralCategoryAndMetadataUnpacked {
717        let plural_category_byte = (byte & 0xF0) >> 4;
718        let metadata_byte = byte & 0x0F;
719        PluralCategoryAndMetadataUnpacked {
720            plural_category_byte,
721            metadata_byte,
722        }
723    }
724
725    fn get(self) -> PluralCategoryAndMetadata {
726        let unpacked = Self::unpack_byte(self.0);
727        // Safety: by invariant, `self.0` contains valid values for PluralCategoryAndMetadata
728        unsafe { PluralCategoryAndMetadata::try_from_unpacked(unpacked).unwrap_unchecked() }
729    }
730}
731
732impl PluralCategoryAndMetadata {
733    fn try_from_unpacked(unpacked: PluralCategoryAndMetadataUnpacked) -> Option<Self> {
734        let plural_category = PluralElementsKeys::new_from_u8(unpacked.plural_category_byte)?;
735        let metadata = FourBitMetadata::try_from_byte(unpacked.metadata_byte)?;
736        Some(Self {
737            plural_category,
738            metadata,
739        })
740    }
741}
742
743impl AsULE for PluralCategoryAndMetadata {
744    type ULE = PluralCategoryAndMetadataPackedULE;
745    #[inline]
746    fn to_unaligned(self) -> Self::ULE {
747        PluralCategoryAndMetadataPackedULE::from(self)
748    }
749    #[inline]
750    fn from_unaligned(unaligned: Self::ULE) -> Self {
751        unaligned.get()
752    }
753}
754
755/// The type of the special patterns list.
756type PluralElementsTupleSliceVarULE<V> = VarZeroSlice<VarTupleULE<PluralCategoryAndMetadata, V>>;
757
758/// The type of the default value.
759type PluralElementWithMetadata<'a, T> = (FourBitMetadata, &'a T);
760
761/// Internal intermediate type that can be converted into a [`PluralElementsPackedULE`].
762struct PluralElementsPackedBuilder<'a, T> {
763    pub default: PluralElementWithMetadata<'a, T>,
764    pub specials: Option<Vec<VarTuple<PluralCategoryAndMetadata, &'a T>>>,
765}
766
767/// Internal unpacked and deserialized values from a [`PluralElementsPackedULE`].
768#[derive(Debug)]
769struct PluralElementsUnpacked<'a, V: VarULE + ?Sized> {
770    pub default: PluralElementWithMetadata<'a, V>,
771    pub specials: Option<&'a PluralElementsTupleSliceVarULE<V>>,
772}
773
774/// Internal unpacked bytes from a [`PluralElementsPackedULE`].
775struct PluralElementsUnpackedBytes<'a> {
776    pub lead_byte: u8,
777    pub v_bytes: &'a [u8],
778    pub specials_bytes: Option<&'a [u8]>,
779}
780
781/// Helper function to access a value from [`PluralElementsTupleSliceVarULE`]
782fn get_special<V: VarULE + ?Sized>(
783    data: &PluralElementsTupleSliceVarULE<V>,
784    key: PluralElementsKeys,
785) -> Option<(FourBitMetadata, &V)> {
786    data.iter()
787        .filter_map(|ule| {
788            let PluralCategoryAndMetadata {
789                plural_category,
790                metadata,
791            } = ule.sized.get();
792            (plural_category == key).then_some((metadata, &ule.variable))
793        })
794        .next()
795}
796
797impl<T> PluralElementsInner<(FourBitMetadata, T)>
798where
799    T: PartialEq,
800{
801    fn to_packed_builder<'a, V>(&'a self) -> PluralElementsPackedBuilder<'a, T>
802    where
803        &'a T: EncodeAsVarULE<V>,
804        V: VarULE + ?Sized,
805    {
806        let specials = self
807            .get_specials_tuples()
808            .map(|(plural_category, (metadata, t))| VarTuple {
809                sized: PluralCategoryAndMetadata {
810                    plural_category,
811                    metadata: *metadata,
812                },
813                variable: t,
814            })
815            .collect::<Vec<_>>();
816        PluralElementsPackedBuilder {
817            default: (self.other.0, &self.other.1),
818            specials: if specials.is_empty() {
819                None
820            } else {
821                Some(specials)
822            },
823        }
824    }
825}
826
827unsafe impl<T, V> EncodeAsVarULE<PluralElementsPackedULE<V>>
828    for PluralElements<(FourBitMetadata, T)>
829where
830    T: PartialEq + fmt::Debug,
831    for<'a> &'a T: EncodeAsVarULE<V>,
832    V: VarULE + ?Sized,
833{
834    fn encode_var_ule_as_slices<R>(&self, _cb: impl FnOnce(&[&[u8]]) -> R) -> R {
835        // unnecessary if the other two are implemented
836        unreachable!()
837    }
838
839    fn encode_var_ule_len(&self) -> usize {
840        let builder = self.0.to_packed_builder();
841        1 + builder.default.1.encode_var_ule_len()
842            + match builder.specials {
843                Some(specials) => {
844                    1 + EncodeAsVarULE::<PluralElementsTupleSliceVarULE<V>>::encode_var_ule_len(
845                        &specials,
846                    )
847                }
848                None => 0,
849            }
850    }
851
852    fn encode_var_ule_write(&self, dst: &mut [u8]) {
853        let builder = self.0.to_packed_builder();
854        #[expect(clippy::unwrap_used)] // by trait invariant
855        let (lead_byte, remainder) = dst.split_first_mut().unwrap();
856        *lead_byte = builder.default.0.get();
857        if let Some(specials) = builder.specials {
858            *lead_byte |= 0x80;
859            #[expect(clippy::unwrap_used)] // by trait invariant
860            let (second_byte, remainder) = remainder.split_first_mut().unwrap();
861            *second_byte = match u8::try_from(builder.default.1.encode_var_ule_len()) {
862                Ok(x) => x,
863                // TODO: Inform the user more nicely that their data doesn't fit in our packed structure
864                #[expect(clippy::panic)] // for now okay since it is mostly only during datagen
865                Err(_) => {
866                    panic!("other value too long to be packed: {self:?}")
867                }
868            };
869            #[expect(clippy::unwrap_used)] // for now okay since it is mostly only during datagen
870            let (v_bytes, specials_bytes) = remainder
871                .split_at_mut_checked(*second_byte as usize)
872                .unwrap();
873            builder.default.1.encode_var_ule_write(v_bytes);
874            EncodeAsVarULE::<PluralElementsTupleSliceVarULE<V>>::encode_var_ule_write(
875                &specials,
876                specials_bytes,
877            );
878        } else {
879            builder.default.1.encode_var_ule_write(remainder)
880        };
881    }
882}
883
884#[cfg(feature = "datagen")]
885impl<'a, V> PluralElementsInner<(FourBitMetadata, &'a V)>
886where
887    V: VarULE + ?Sized,
888{
889    fn from_packed(packed: &'a PluralElementsPackedULE<V>) -> Self {
890        let parts = packed.as_parts();
891        PluralElementsInner {
892            other: parts.default,
893            zero: parts
894                .specials
895                .and_then(|specials| get_special(specials, PluralElementsKeys::Zero)),
896            one: parts
897                .specials
898                .and_then(|specials| get_special(specials, PluralElementsKeys::One)),
899            two: parts
900                .specials
901                .and_then(|specials| get_special(specials, PluralElementsKeys::Two)),
902            few: parts
903                .specials
904                .and_then(|specials| get_special(specials, PluralElementsKeys::Few)),
905            many: parts
906                .specials
907                .and_then(|specials| get_special(specials, PluralElementsKeys::Many)),
908            explicit_zero: parts
909                .specials
910                .and_then(|specials| get_special(specials, PluralElementsKeys::ExplicitZero)),
911            explicit_one: parts
912                .specials
913                .and_then(|specials| get_special(specials, PluralElementsKeys::ExplicitOne)),
914        }
915    }
916}
917
918#[cfg(feature = "serde")]
919impl<T> PluralElementsInner<(FourBitMetadata, T)> {
920    fn into_packed<V>(self) -> Box<PluralElementsPackedULE<V>>
921    where
922        T: PartialEq + fmt::Debug,
923        for<'a> &'a T: EncodeAsVarULE<V>,
924        V: VarULE + ?Sized,
925    {
926        zerovec::ule::encode_varule_to_box(&PluralElements(self))
927    }
928}
929
930#[cfg(feature = "serde")]
931impl<'de, 'data, V> serde::Deserialize<'de> for &'data PluralElementsPackedULE<V>
932where
933    'de: 'data,
934    V: VarULE + ?Sized,
935{
936    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
937    where
938        D: serde::Deserializer<'de>,
939    {
940        if deserializer.is_human_readable() {
941            Err(serde::de::Error::custom(
942                "&PluralElementsPackedULE cannot be deserialized from human-readable formats",
943            ))
944        } else {
945            let bytes = <&[u8]>::deserialize(deserializer)?;
946            PluralElementsPackedULE::<V>::parse_bytes(bytes).map_err(serde::de::Error::custom)
947        }
948    }
949}
950
951#[cfg(feature = "serde")]
952impl<'de, V> serde::Deserialize<'de> for Box<PluralElementsPackedULE<V>>
953where
954    V: VarULE + ?Sized,
955    Box<V>: serde::Deserialize<'de> + PartialEq + fmt::Debug,
956{
957    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
958    where
959        D: serde::Deserializer<'de>,
960    {
961        if deserializer.is_human_readable() {
962            let plural_elements: PluralElementsInner<(FourBitMetadata, Box<V>)> =
963                PluralElementsInner::deserialize(deserializer)?;
964            Ok(plural_elements.into_packed())
965        } else {
966            let bytes = <&[u8]>::deserialize(deserializer)?;
967            PluralElementsPackedULE::<V>::parse_bytes(bytes)
968                .map(|ule| ule.to_owned())
969                .map_err(serde::de::Error::custom)
970        }
971    }
972}
973
974#[cfg(feature = "datagen")]
975impl<V> serde::Serialize for PluralElementsPackedULE<V>
976where
977    V: PartialEq + serde::Serialize + VarULE + ?Sized,
978{
979    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
980    where
981        S: serde::Serializer,
982    {
983        if serializer.is_human_readable() {
984            let plural_elements: PluralElementsInner<(FourBitMetadata, &V)> =
985                PluralElementsInner::from_packed(self);
986            plural_elements.serialize(serializer)
987        } else {
988            serializer.serialize_bytes(self.as_bytes())
989        }
990    }
991}
992
993#[cfg(feature = "datagen")]
994impl<'a, V> databake::Bake for &'a PluralElementsPackedULE<V>
995where
996    &'a V: databake::Bake,
997    V: VarULE + ?Sized,
998{
999    fn bake(&self, ctx: &databake::CrateEnv) -> databake::TokenStream {
1000        ctx.insert("icu_plurals");
1001        let bytes = (&self.bytes).bake(ctx);
1002        databake::quote! {
1003            // Safety: the bytes came directly from self.bytes on the previous line.
1004            unsafe { icu_plurals::provider::PluralElementsPackedULE::from_bytes_unchecked(#bytes) }
1005        }
1006    }
1007}
1008
1009#[cfg(feature = "datagen")]
1010impl<'a, V> databake::BakeSize for &'a PluralElementsPackedULE<V>
1011where
1012    &'a V: databake::Bake,
1013    V: VarULE + ?Sized,
1014{
1015    fn borrows_size(&self) -> usize {
1016        self.bytes.len()
1017    }
1018}
1019
1020/// Helper function to properly deserialize a `Cow<PluralElementsPackedULE<V>>`
1021///
1022/// Due to <https://github.com/rust-lang/rust/issues/130180>, you may need to qualify
1023/// `V` when invoking this, like so:
1024///
1025/// `#[serde(deserialize_with = "deserialize_plural_elements_packed_cow::<_, str>")]`
1026///
1027/// See <https://github.com/unicode-org/icu4x/pull/1556>
1028#[cfg(feature = "serde")]
1029fn deserialize_plural_elements_packed_cow<'de, 'data, D, V>(
1030    deserializer: D,
1031) -> Result<Cow<'data, PluralElementsPackedULE<V>>, D::Error>
1032where
1033    'de: 'data,
1034    D: serde::Deserializer<'de>,
1035    V: VarULE + ?Sized,
1036    Box<PluralElementsPackedULE<V>>: serde::Deserialize<'de>,
1037{
1038    use serde::Deserialize;
1039    if deserializer.is_human_readable() {
1040        let value = Box::<PluralElementsPackedULE<V>>::deserialize(deserializer)?;
1041        Ok(Cow::Owned(value))
1042    } else {
1043        let value = <&'de PluralElementsPackedULE<V>>::deserialize(deserializer)?;
1044        Ok(Cow::Borrowed(value))
1045    }
1046}
1047
1048// Need a manual impl because the derive(Clone) impl bounds are wrong
1049impl<V> Clone for PluralElementsPackedCow<'_, V>
1050where
1051    V: VarULE + ?Sized,
1052{
1053    fn clone(&self) -> Self {
1054        Self {
1055            elements: self.elements.clone(),
1056        }
1057    }
1058}
1059
1060impl<T, V> From<PluralElements<T>> for PluralElementsPackedCow<'static, V>
1061where
1062    V: VarULE + ?Sized,
1063    T: PartialEq + fmt::Debug,
1064    for<'a> &'a T: EncodeAsVarULE<V>,
1065{
1066    fn from(value: PluralElements<T>) -> Self {
1067        let elements =
1068            zerovec::ule::encode_varule_to_box(&value.map(|s| (FourBitMetadata::zero(), s)));
1069        Self {
1070            elements: Cow::Owned(elements),
1071        }
1072    }
1073}
1074
1075impl<V> PluralElementsPackedCow<'_, V>
1076where
1077    V: VarULE + ?Sized,
1078{
1079    /// Returns the value for the given [`PluralOperands`] and [`PluralRules`].
1080    pub fn get<'a>(&'a self, op: PluralOperands, rules: &PluralRules) -> &'a V {
1081        self.elements.get(op, rules).1
1082    }
1083}
1084
1085#[test]
1086fn test_serde_singleton_roundtrip() {
1087    let plural_elements = PluralElements::new((FourBitMetadata::zero(), "abc"));
1088    let ule = zerovec::ule::encode_varule_to_box(&plural_elements);
1089
1090    let postcard_bytes = postcard::to_allocvec(&ule).unwrap();
1091    assert_eq!(
1092        postcard_bytes,
1093        &[
1094            4,    // Postcard header
1095            0x00, // Discriminant
1096            b'a', b'b', b'c', // String
1097        ]
1098    );
1099
1100    let postcard_ule: Box<PluralElementsPackedULE<str>> =
1101        postcard::from_bytes(&postcard_bytes).unwrap();
1102    assert_eq!(ule, postcard_ule);
1103
1104    let postcard_borrowed: &PluralElementsPackedULE<str> =
1105        postcard::from_bytes(&postcard_bytes).unwrap();
1106    assert_eq!(&*ule, postcard_borrowed);
1107
1108    let postcard_cow: PluralElementsPackedCow<str> = postcard::from_bytes(&postcard_bytes).unwrap();
1109    assert_eq!(&*ule, &*postcard_cow.elements);
1110    assert!(matches!(postcard_cow.elements, Cow::Borrowed(_)));
1111
1112    let json_str = serde_json::to_string(&ule).unwrap();
1113    let json_ule: Box<PluralElementsPackedULE<str>> = serde_json::from_str(&json_str).unwrap();
1114    assert_eq!(ule, json_ule);
1115}
1116
1117#[test]
1118fn test_serde_nonsingleton_roundtrip() {
1119    let plural_elements = PluralElements::new((FourBitMetadata::zero(), "abc"))
1120        .with_one_value(Some((FourBitMetadata::zero(), "defg")));
1121    let ule = zerovec::ule::encode_varule_to_box(&plural_elements);
1122
1123    let postcard_bytes = postcard::to_allocvec(&ule).unwrap();
1124    assert_eq!(
1125        postcard_bytes,
1126        &[
1127            12,   // Postcard header
1128            0x80, // Discriminant
1129            3, b'a', b'b', b'c', // String of length 3
1130            1, 0, // VarZeroVec of length 1
1131            0x10, b'd', b'e', b'f', b'g' // Plural category 1 and string "defg"
1132        ]
1133    );
1134
1135    let postcard_ule: Box<PluralElementsPackedULE<str>> =
1136        postcard::from_bytes(&postcard_bytes).unwrap();
1137    assert_eq!(ule, postcard_ule);
1138
1139    let postcard_borrowed: &PluralElementsPackedULE<str> =
1140        postcard::from_bytes(&postcard_bytes).unwrap();
1141    assert_eq!(&*ule, postcard_borrowed);
1142
1143    let json_str = serde_json::to_string(&ule).unwrap();
1144    let json_ule: Box<PluralElementsPackedULE<str>> = serde_json::from_str(&json_str).unwrap();
1145    assert_eq!(ule, json_ule);
1146}