icu_calendar/provider/
chinese_based.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! 🚧 \[Unstable\] Data provider struct definitions for chinese-based calendars.
6//!
7//! <div class="stab unstable">
8//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
9//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
10//! to be stable, their Rust representation might not be. Use with caution.
11//! </div>
12//!
13//! Read more about data providers: [`icu_provider`]
14
15use icu_provider::prelude::*;
16use zerovec::ule::{AsULE, ULE};
17use zerovec::ZeroVec;
18
19icu_provider::data_marker!(
20    /// Precomputed data for the Chinese calendar
21    CalendarChineseV1,
22    "calendar/chinese/v1",
23    ChineseBasedCache<'static>,
24    is_singleton = true
25);
26icu_provider::data_marker!(
27    /// Precomputed data for the Dangi calendar
28    CalendarDangiV1,
29    "calendar/dangi/v1",
30    ChineseBasedCache<'static>,
31    is_singleton = true
32);
33
34/// Cached/precompiled data for a certain range of years for a chinese-based
35/// calendar. Avoids the need to perform lunar calendar arithmetic for most calendrical
36/// operations.
37#[derive(Debug, PartialEq, Clone, Default, yoke::Yokeable, zerofrom::ZeroFrom)]
38#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
39#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider::chinese_based))]
40#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
41pub struct ChineseBasedCache<'data> {
42    /// The ISO year corresponding to the first data entry for this year
43    pub first_related_iso_year: i32,
44    /// A list of precomputed data for each year beginning with first_related_iso_year
45    #[cfg_attr(feature = "serde", serde(borrow))]
46    pub data: ZeroVec<'data, PackedChineseBasedYearInfo>,
47}
48
49icu_provider::data_struct!(
50    ChineseBasedCache<'_>,
51    #[cfg(feature = "datagen")]
52);
53
54/// The struct containing compiled ChineseData
55///
56/// Bit structure (little endian: note that shifts go in the opposite direction!)
57///
58/// ```text
59/// Bit:             0   1   2   3   4   5   6   7
60/// Byte 0:          [  month lengths .............
61/// Byte 1:         .. month lengths ] | [ leap month index ..
62/// Byte 2:          ] | [   NY offset       ] | unused
63/// ```
64///
65/// Where the New Year Offset is the offset from ISO Jan 21 of that year for Chinese New Year,
66/// the month lengths are stored as 1 = 30, 0 = 29 for each month including the leap month.
67/// The largest possible offset is 33, which requires 6 bits of storage.
68///
69/// <div class="stab unstable">
70/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
71/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
72/// to be stable, their Rust representation might not be. Use with caution.
73/// </div>
74#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, ULE)]
75#[cfg_attr(feature = "datagen", derive(databake::Bake))]
76#[cfg_attr(feature = "datagen", databake(path = icu_calendar::provider))]
77#[repr(C, packed)]
78pub struct PackedChineseBasedYearInfo(pub u8, pub u8, pub u8);
79
80impl PackedChineseBasedYearInfo {
81    /// The first day of the ISO year on which Chinese New Year may occur
82    ///
83    /// According to Reingold & Dershowitz, ch 19.6, Chinese New Year occurs on Jan 21 - Feb 21 inclusive.
84    ///
85    /// Chinese New Year in the year 30 AD is January 20 (30-01-20).
86    ///
87    /// We allow it to occur as early as January 19 which is the earliest the second new moon
88    /// could occur after the Winter Solstice if the solstice is pinned to December 20.
89    const FIRST_NY: i64 = 18;
90
91    pub(crate) fn new(
92        month_lengths: [bool; 13],
93        leap_month_idx: Option<u8>,
94        ny_offset: i64,
95    ) -> Self {
96        debug_assert!(
97            !month_lengths[12] || leap_month_idx.is_some(),
98            "Last month length should not be set for non-leap years"
99        );
100        let ny_offset = ny_offset - Self::FIRST_NY;
101        debug_assert!(ny_offset >= 0, "Year offset too small to store");
102        debug_assert!(ny_offset < 34, "Year offset too big to store");
103        debug_assert!(
104            leap_month_idx.map(|l| l <= 13).unwrap_or(true),
105            "Leap month indices must be 1 <= i <= 13"
106        );
107        let mut all = 0u32; // last byte unused
108
109        for (month, length_30) in month_lengths.iter().enumerate() {
110            #[allow(clippy::indexing_slicing)]
111            if *length_30 {
112                all |= 1 << month as u32;
113            }
114        }
115        let leap_month_idx = leap_month_idx.unwrap_or(0);
116        all |= (leap_month_idx as u32) << (8 + 5);
117        all |= (ny_offset as u32) << (16 + 1);
118        let le = all.to_le_bytes();
119        Self(le[0], le[1], le[2])
120    }
121
122    // Get the new year difference from the ISO new year
123    pub(crate) fn ny_offset(self) -> u8 {
124        Self::FIRST_NY as u8 + (self.2 >> 1)
125    }
126
127    pub(crate) fn leap_month(self) -> Option<u8> {
128        let bits = (self.1 >> 5) + ((self.2 & 0b1) << 3);
129
130        (bits != 0).then_some(bits)
131    }
132
133    // Whether a particular month has 30 days (month is 1-indexed)
134    pub(crate) fn month_has_30_days(self, month: u8) -> bool {
135        let months = u16::from_le_bytes([self.0, self.1]);
136        months & (1 << (month - 1) as u16) != 0
137    }
138
139    #[cfg(any(test, feature = "datagen"))]
140    pub(crate) fn month_lengths(self) -> [bool; 13] {
141        core::array::from_fn(|i| self.month_has_30_days(i as u8 + 1))
142    }
143
144    // Which day of year is the last day of a month (month is 1-indexed)
145    pub(crate) fn last_day_of_month(self, month: u8) -> u16 {
146        let months = u16::from_le_bytes([self.0, self.1]);
147        // month is 1-indexed, so `29 * month` includes the current month
148        let mut prev_month_lengths = 29 * month as u16;
149        // month is 1-indexed, so `1 << month` is a mask with all zeroes except
150        // for a 1 at the bit index at the next month. Subtracting 1 from it gets us
151        // a bitmask for all months up to now
152        let long_month_bits = months & ((1 << month as u16) - 1);
153        prev_month_lengths += long_month_bits.count_ones().try_into().unwrap_or(0);
154        prev_month_lengths
155    }
156}
157
158impl AsULE for PackedChineseBasedYearInfo {
159    type ULE = Self;
160    fn to_unaligned(self) -> Self {
161        self
162    }
163    fn from_unaligned(other: Self) -> Self {
164        other
165    }
166}
167
168#[cfg(feature = "serde")]
169mod serialization {
170    use super::*;
171
172    #[cfg(feature = "datagen")]
173    use serde::{ser, Serialize};
174    use serde::{Deserialize, Deserializer};
175
176    #[derive(Deserialize)]
177    #[cfg_attr(feature = "datagen", derive(Serialize))]
178    struct SerdePackedChineseBasedYearInfo {
179        ny_offset: u8,
180        month_has_30_days: [bool; 13],
181        leap_month_idx: Option<u8>,
182    }
183
184    impl<'de> Deserialize<'de> for PackedChineseBasedYearInfo {
185        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
186        where
187            D: Deserializer<'de>,
188        {
189            if deserializer.is_human_readable() {
190                SerdePackedChineseBasedYearInfo::deserialize(deserializer).map(Into::into)
191            } else {
192                let data = <(u8, u8, u8)>::deserialize(deserializer)?;
193                Ok(PackedChineseBasedYearInfo(data.0, data.1, data.2))
194            }
195        }
196    }
197
198    #[cfg(feature = "datagen")]
199    impl Serialize for PackedChineseBasedYearInfo {
200        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
201        where
202            S: ser::Serializer,
203        {
204            if serializer.is_human_readable() {
205                SerdePackedChineseBasedYearInfo::from(*self).serialize(serializer)
206            } else {
207                (self.0, self.1, self.2).serialize(serializer)
208            }
209        }
210    }
211
212    #[cfg(feature = "datagen")]
213    impl From<PackedChineseBasedYearInfo> for SerdePackedChineseBasedYearInfo {
214        fn from(other: PackedChineseBasedYearInfo) -> Self {
215            Self {
216                ny_offset: other.ny_offset(),
217                month_has_30_days: other.month_lengths(),
218                leap_month_idx: other.leap_month(),
219            }
220        }
221    }
222
223    impl From<SerdePackedChineseBasedYearInfo> for PackedChineseBasedYearInfo {
224        fn from(other: SerdePackedChineseBasedYearInfo) -> Self {
225            Self::new(
226                other.month_has_30_days,
227                other.leap_month_idx,
228                other.ny_offset as i64,
229            )
230        }
231    }
232}
233
234#[cfg(test)]
235mod test {
236    use super::*;
237
238    fn packed_roundtrip_single(
239        mut month_lengths: [bool; 13],
240        leap_month_idx: Option<u8>,
241        ny_offset: i64,
242    ) {
243        if leap_month_idx.is_none() {
244            // Avoid bad invariants
245            month_lengths[12] = false;
246        }
247        let packed = PackedChineseBasedYearInfo::new(month_lengths, leap_month_idx, ny_offset);
248
249        assert_eq!(
250            ny_offset,
251            packed.ny_offset() as i64,
252            "Roundtrip with {month_lengths:?}, {leap_month_idx:?}, {ny_offset}"
253        );
254        assert_eq!(
255            leap_month_idx,
256            packed.leap_month(),
257            "Roundtrip with {month_lengths:?}, {leap_month_idx:?}, {ny_offset}"
258        );
259        let month_lengths_roundtrip = packed.month_lengths();
260        assert_eq!(
261            month_lengths, month_lengths_roundtrip,
262            "Roundtrip with {month_lengths:?}, {leap_month_idx:?}, {ny_offset}"
263        );
264    }
265
266    #[test]
267    fn test_roundtrip_packed() {
268        const SHORT: [bool; 13] = [false; 13];
269        const LONG: [bool; 13] = [true; 13];
270        const ALTERNATING1: [bool; 13] = [
271            false, true, false, true, false, true, false, true, false, true, false, true, false,
272        ];
273        const ALTERNATING2: [bool; 13] = [
274            true, false, true, false, true, false, true, false, true, false, true, false, true,
275        ];
276        const RANDOM1: [bool; 13] = [
277            true, true, false, false, true, true, false, true, true, true, true, false, true,
278        ];
279        const RANDOM2: [bool; 13] = [
280            false, true, true, true, true, false, true, true, true, false, false, true, false,
281        ];
282        packed_roundtrip_single(SHORT, None, 18 + 5);
283        packed_roundtrip_single(SHORT, None, 18 + 10);
284        packed_roundtrip_single(SHORT, Some(11), 18 + 15);
285        packed_roundtrip_single(LONG, Some(12), 18 + 15);
286        packed_roundtrip_single(ALTERNATING1, None, 18 + 2);
287        packed_roundtrip_single(ALTERNATING1, Some(3), 18 + 5);
288        packed_roundtrip_single(ALTERNATING2, None, 18 + 9);
289        packed_roundtrip_single(ALTERNATING2, Some(7), 18 + 26);
290        packed_roundtrip_single(RANDOM1, None, 18 + 29);
291        packed_roundtrip_single(RANDOM1, Some(12), 18 + 29);
292        packed_roundtrip_single(RANDOM1, Some(2), 18 + 21);
293        packed_roundtrip_single(RANDOM2, None, 18 + 25);
294        packed_roundtrip_single(RANDOM2, Some(2), 18 + 19);
295        packed_roundtrip_single(RANDOM2, Some(5), 18 + 2);
296        packed_roundtrip_single(RANDOM2, Some(12), 18 + 5);
297    }
298}