icu_datetime/provider/fields/
length.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use core::cmp::{Ord, PartialOrd};
6use core::fmt;
7use displaydoc::Display;
8use zerovec::ule::{AsULE, UleError, ULE};
9
10/// An error relating to the length of a field within a date pattern.
11#[derive(Display, Debug, PartialEq, Copy, Clone)]
12#[non_exhaustive]
13pub enum LengthError {
14    /// The length of the field string within the pattern is invalid, according to
15    /// the field type and its supported field patterns in LDML. See [`FieldLength`].
16    #[displaydoc("Invalid length")]
17    InvalidLength,
18}
19
20impl core::error::Error for LengthError {}
21
22/// An enum representing the length of a field within a date or time formatting pattern string.
23///
24/// Such strings represent fields as a letter occurring 1 or more times in a row, ex:
25/// `MMM`, `dd`, `y`.  See the
26/// [LDML documentation in UTS 35](https://unicode.org/reports/tr35/tr35-dates.html#Date_Format_Patterns)
27/// for more details.
28#[derive(Debug, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
29#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
30#[cfg_attr(feature = "datagen", databake(path = icu_datetime::fields))]
31#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
32#[allow(clippy::exhaustive_enums)] // part of data struct
33pub enum FieldLength {
34    /// Numeric: minimum digits
35    ///
36    /// Text: same as [`Self::Three`]
37    One,
38    /// Numeric: pad to 2 digits
39    ///
40    /// Text: same as [`Self::Three`]
41    Two,
42    /// Numeric: pad to 3 digits
43    ///
44    /// Text: Abbreviated format.
45    Three,
46    /// Numeric: pad to 4 digits
47    ///
48    /// Text: Wide format.
49    Four,
50    /// Numeric: pad to 5 digits
51    ///
52    /// Text: Narrow format.
53    Five,
54    /// Numeric: pad to 6 digits
55    ///
56    /// Text: Short format.
57    Six,
58    /// FieldLength::One (numeric), but overridden with a different numbering system
59    NumericOverride(FieldNumericOverrides),
60}
61
62/// First index used for numeric overrides in compact FieldLength representation
63///
64/// Currently 17 due to decision in <https://unicode-org.atlassian.net/browse/CLDR-17217>,
65/// may become 16 if the `> 16` is updated to a ` >= 16`
66const FIRST_NUMERIC_OVERRIDE: u8 = 17;
67/// Last index used for numeric overrides
68const LAST_NUMERIC_OVERRIDE: u8 = 31;
69
70impl FieldLength {
71    #[inline]
72    pub(crate) fn idx(self) -> u8 {
73        match self {
74            FieldLength::One => 1,
75            FieldLength::Two => 2,
76            FieldLength::Three => 3,
77            FieldLength::Four => 4,
78            FieldLength::Five => 5,
79            FieldLength::Six => 6,
80            FieldLength::NumericOverride(o) => FIRST_NUMERIC_OVERRIDE
81                .saturating_add(o as u8)
82                .min(LAST_NUMERIC_OVERRIDE),
83        }
84    }
85
86    #[inline]
87    pub(crate) fn from_idx(idx: u8) -> Result<Self, LengthError> {
88        Ok(match idx {
89            1 => Self::One,
90            2 => Self::Two,
91            3 => Self::Three,
92            4 => Self::Four,
93            5 => Self::Five,
94            6 => Self::Six,
95            idx if (FIRST_NUMERIC_OVERRIDE..=LAST_NUMERIC_OVERRIDE).contains(&idx) => {
96                Self::NumericOverride((idx - FIRST_NUMERIC_OVERRIDE).try_into()?)
97            }
98            _ => return Err(LengthError::InvalidLength),
99        })
100    }
101
102    #[inline]
103    pub(crate) fn to_len(self) -> usize {
104        match self {
105            FieldLength::One => 1,
106            FieldLength::Two => 2,
107            FieldLength::Three => 3,
108            FieldLength::Four => 4,
109            FieldLength::Five => 5,
110            FieldLength::Six => 6,
111            FieldLength::NumericOverride(o) => FIRST_NUMERIC_OVERRIDE as usize + o as usize,
112        }
113    }
114
115    /// UTS 35 defines several 1 and 2 symbols to be the same as 3 symbols (abbreviated).
116    /// For example, 'a' represents an abbreviated day period, the same as 'aaa'.
117    ///
118    /// This function maps field lengths 1 and 2 to field length 3.
119    pub(crate) fn numeric_to_abbr(self) -> Self {
120        match self {
121            FieldLength::One | FieldLength::Two => FieldLength::Three,
122            other => other,
123        }
124    }
125}
126
127#[repr(transparent)]
128#[derive(Debug, Copy, Clone, PartialEq, Eq)]
129pub struct FieldLengthULE(u8);
130
131impl AsULE for FieldLength {
132    type ULE = FieldLengthULE;
133    fn to_unaligned(self) -> Self::ULE {
134        FieldLengthULE(self.idx())
135    }
136    fn from_unaligned(unaligned: Self::ULE) -> Self {
137        #[allow(clippy::unwrap_used)] // OK because the ULE is pre-validated
138        Self::from_idx(unaligned.0).unwrap()
139    }
140}
141
142impl FieldLengthULE {
143    #[inline]
144    pub(crate) fn validate_byte(byte: u8) -> Result<(), UleError> {
145        FieldLength::from_idx(byte)
146            .map(|_| ())
147            .map_err(|_| UleError::parse::<FieldLength>())
148    }
149}
150
151// Safety checklist for ULE:
152//
153// 1. Must not include any uninitialized or padding bytes (true since transparent over a ULE).
154// 2. Must have an alignment of 1 byte (true since transparent over a ULE).
155// 3. ULE::validate_bytes() checks that the given byte slice represents a valid slice.
156// 4. ULE::validate_bytes() checks that the given byte slice has a valid length
157//    (true since transparent over a type of size 1).
158// 5. All other methods must be left with their default impl.
159// 6. Byte equality is semantic equality.
160unsafe impl ULE for FieldLengthULE {
161    fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
162        for byte in bytes {
163            Self::validate_byte(*byte)?;
164        }
165        Ok(())
166    }
167}
168
169/// Various numeric overrides for datetime patterns
170/// as found in CLDR
171#[derive(Debug, Eq, PartialEq, Clone, Copy, Ord, PartialOrd)]
172#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
173#[cfg_attr(feature = "datagen", databake(path = icu_datetime::fields))]
174#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
175#[non_exhaustive]
176pub enum FieldNumericOverrides {
177    /// `hanidec`
178    Hanidec = 0,
179    /// `hanidays`
180    Hanidays = 1,
181    /// `hebr`
182    Hebr = 2,
183    /// `romanlow`
184    Romanlow = 3,
185    /// `jpnyear`
186    Jpnyear = 4,
187}
188
189impl TryFrom<u8> for FieldNumericOverrides {
190    type Error = LengthError;
191    fn try_from(other: u8) -> Result<Self, LengthError> {
192        Ok(match other {
193            0 => Self::Hanidec,
194            1 => Self::Hanidays,
195            2 => Self::Hebr,
196            3 => Self::Romanlow,
197            4 => Self::Jpnyear,
198            _ => return Err(LengthError::InvalidLength),
199        })
200    }
201}
202
203impl FieldNumericOverrides {
204    /// Convert this to the corresponding string code
205    pub fn as_str(self) -> &'static str {
206        match self {
207            Self::Hanidec => "hanidec",
208            Self::Hanidays => "hanidays",
209            Self::Hebr => "hebr",
210            Self::Romanlow => "romanlow",
211            Self::Jpnyear => "jpnyear",
212        }
213    }
214}
215
216impl fmt::Display for FieldNumericOverrides {
217    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
218        self.as_str().fmt(f)
219    }
220}