icu_time/provider/
iana.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! 🚧 \[Unstable\] Property names-related data for this component
6//!
7//! <div class="stab unstable">
8//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
9//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
10//! to be stable, their Rust representation might not be. Use with caution.
11//! </div>
12//!
13//! Read more about data providers: [`icu_provider`]
14
15use crate::TimeZone;
16use icu_provider::prelude::*;
17use zerotrie::ZeroAsciiIgnoreCaseTrie;
18use zerovec::{VarZeroVec, ZeroVec};
19
20/// [`IanaToBcp47Map`]'s trie cannot handle differently-cased prefixes, like `Mexico/BajaSur`` and `MET`.
21///
22/// Therefore, any ID that is not of the shape `{region}/{city}` gets prefixed with this character
23/// inside the trie.
24///
25/// During lookup, if the input is not of the shape `{region}/{city}`, the trie cursor has to be advanced over
26/// this byte.
27pub const NON_REGION_CITY_PREFIX: u8 = b'_';
28
29icu_provider::data_marker!(
30    /// See [`IanaToBcp47Map`]
31    ///
32    /// This marker uses a checksum to ensure consistency with [`TimezoneIdentifiersIanaExtendedV1`].
33    TimezoneIdentifiersIanaCoreV1,
34    "timezone/identifiers/iana/core/v1",
35    IanaToBcp47Map<'static>,
36    is_singleton = true,
37    has_checksum = true,
38);
39
40icu_provider::data_marker!(
41    /// See [`Bcp47ToIanaMap`]
42    ///
43    /// This marker uses a checksum to ensure consistency with [`TimezoneIdentifiersIanaCoreV1`].
44    TimezoneIdentifiersIanaExtendedV1,
45    "timezone/identifiers/iana/extended/v1",
46    IanaNames<'static>,
47    is_singleton = true,
48    has_checksum = true,
49);
50
51/// A mapping from normal-case IANA time zone identifiers to BCP-47 time zone identifiers.
52///
53/// Multiple IANA time zone IDs can map to the same BCP-47 time zone ID.
54///
55/// <div class="stab unstable">
56/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
57/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
58/// to be stable, their Rust representation might not be. Use with caution.
59/// </div>
60#[derive(Debug, Clone, PartialEq, zerofrom::ZeroFrom, yoke::Yokeable)]
61#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
62#[cfg_attr(feature = "datagen", databake(path = icu_time::provider::iana))]
63#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
64pub struct IanaToBcp47Map<'data> {
65    /// A map from normal-case IANA time zone identifiers to indexes of BCP-47 time zone
66    /// identifiers along with a canonical flag. The IANA identifiers are normal-case.
67    ///
68    /// The `usize` values stored in the trie have the following form:
69    ///
70    /// - Lowest bit: 1 if canonical, 0 if not canonical
71    /// - All remaining bits: index into `bcp47_ids`
72    ///
73    /// For example, in CLDR 44, `"Africa/Abidjan"` has value 221, which means it is canonical
74    /// (low bit is 1 == odd number) and the index into `bcp47_ids` is 110 (221 >> 1).
75    #[cfg_attr(feature = "serde", serde(borrow))]
76    pub map: ZeroAsciiIgnoreCaseTrie<ZeroVec<'data, u8>>,
77    /// A list of BCP-47 time zone identifiers, sorted by canonical IANA ID.
78    #[cfg_attr(feature = "serde", serde(borrow))]
79    // Note: this is 9739B as `ZeroVec<TimeZone>` (`ZeroVec<TinyStr8>`)
80    // and 9335B as `VarZeroVec<str>`
81    pub bcp47_ids: ZeroVec<'data, TimeZone>,
82}
83
84icu_provider::data_struct!(
85    IanaToBcp47Map<'_>,
86    #[cfg(feature = "datagen")]
87);
88
89/// A mapping from IANA time zone identifiers to BCP-47 time zone identifiers.
90///
91/// The BCP-47 time zone ID maps to the default IANA time zone ID according to the CLDR data.
92///
93/// <div class="stab unstable">
94/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
95/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
96/// to be stable, their Rust representation might not be. Use with caution.
97/// </div>
98#[derive(Debug, Clone, PartialEq, zerofrom::ZeroFrom, yoke::Yokeable)]
99#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
100#[cfg_attr(feature = "datagen", databake(path = icu_time::provider::iana))]
101#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
102#[yoke(prove_covariance_manually)]
103pub struct IanaNames<'data> {
104    /// The list of all normalized IANA identifiers.
105    ///
106    /// The first `bcp47_ids.len()` identifiers are canonical for the
107    /// the BCP-47 IDs in [`IanaToBcp47Map::bcp47_ids`] at the same index.
108    ///
109    /// The remaining non-canonical identifiers are sorted in ascending lowercase order.
110    #[cfg_attr(feature = "serde", serde(borrow))]
111    pub normalized_iana_ids: VarZeroVec<'data, str>,
112}
113
114icu_provider::data_struct!(
115    IanaNames<'_>,
116    #[cfg(feature = "datagen")]
117);