icu_time/provider/iana.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! 🚧 \[Unstable\] Property names-related data for this component
6//!
7//! <div class="stab unstable">
8//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
9//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
10//! to be stable, their Rust representation might not be. Use with caution.
11//! </div>
12//!
13//! Read more about data providers: [`icu_provider`]
14
15use crate::TimeZone;
16use icu_provider::prelude::*;
17use zerotrie::ZeroAsciiIgnoreCaseTrie;
18use zerovec::{VarZeroVec, ZeroVec};
19
20/// [`IanaToBcp47Map`]'s trie cannot handle differently-cased prefixes, like `Mexico/BajaSur`` and `MET`.
21///
22/// Therefore, any ID that is not of the shape `{region}/{city}` gets prefixed with this character
23/// inside the trie.
24///
25/// During lookup, if the input is not of the shape `{region}/{city}`, the trie cursor has to be advanced over
26/// this byte.
27pub const NON_REGION_CITY_PREFIX: u8 = b'_';
28
29icu_provider::data_marker!(
30 /// See [`IanaToBcp47Map`]
31 ///
32 /// This marker uses a checksum to ensure consistency with [`TimezoneIdentifiersIanaExtendedV1`].
33 TimezoneIdentifiersIanaCoreV1,
34 "timezone/identifiers/iana/core/v1",
35 IanaToBcp47Map<'static>,
36 is_singleton = true,
37 has_checksum = true,
38);
39
40icu_provider::data_marker!(
41 /// See [`Bcp47ToIanaMap`]
42 ///
43 /// This marker uses a checksum to ensure consistency with [`TimezoneIdentifiersIanaCoreV1`].
44 TimezoneIdentifiersIanaExtendedV1,
45 "timezone/identifiers/iana/extended/v1",
46 IanaNames<'static>,
47 is_singleton = true,
48 has_checksum = true,
49);
50
51/// A mapping from normal-case IANA time zone identifiers to BCP-47 time zone identifiers.
52///
53/// Multiple IANA time zone IDs can map to the same BCP-47 time zone ID.
54///
55/// <div class="stab unstable">
56/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
57/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
58/// to be stable, their Rust representation might not be. Use with caution.
59/// </div>
60#[derive(Debug, Clone, PartialEq, zerofrom::ZeroFrom, yoke::Yokeable)]
61#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
62#[cfg_attr(feature = "datagen", databake(path = icu_time::provider::iana))]
63#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
64pub struct IanaToBcp47Map<'data> {
65 /// A map from normal-case IANA time zone identifiers to indexes of BCP-47 time zone
66 /// identifiers along with a canonical flag. The IANA identifiers are normal-case.
67 ///
68 /// The `usize` values stored in the trie have the following form:
69 ///
70 /// - Lowest bit: 1 if canonical, 0 if not canonical
71 /// - All remaining bits: index into `bcp47_ids`
72 ///
73 /// For example, in CLDR 44, `"Africa/Abidjan"` has value 221, which means it is canonical
74 /// (low bit is 1 == odd number) and the index into `bcp47_ids` is 110 (221 >> 1).
75 #[cfg_attr(feature = "serde", serde(borrow))]
76 pub map: ZeroAsciiIgnoreCaseTrie<ZeroVec<'data, u8>>,
77 /// A list of BCP-47 time zone identifiers, sorted by canonical IANA ID.
78 #[cfg_attr(feature = "serde", serde(borrow))]
79 // Note: this is 9739B as `ZeroVec<TimeZone>` (`ZeroVec<TinyStr8>`)
80 // and 9335B as `VarZeroVec<str>`
81 pub bcp47_ids: ZeroVec<'data, TimeZone>,
82}
83
84icu_provider::data_struct!(
85 IanaToBcp47Map<'_>,
86 #[cfg(feature = "datagen")]
87);
88
89/// A mapping from IANA time zone identifiers to BCP-47 time zone identifiers.
90///
91/// The BCP-47 time zone ID maps to the default IANA time zone ID according to the CLDR data.
92///
93/// <div class="stab unstable">
94/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
95/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
96/// to be stable, their Rust representation might not be. Use with caution.
97/// </div>
98#[derive(Debug, Clone, PartialEq, zerofrom::ZeroFrom, yoke::Yokeable)]
99#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
100#[cfg_attr(feature = "datagen", databake(path = icu_time::provider::iana))]
101#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
102#[yoke(prove_covariance_manually)]
103pub struct IanaNames<'data> {
104 /// The list of all normalized IANA identifiers.
105 ///
106 /// The first `bcp47_ids.len()` identifiers are canonical for the
107 /// the BCP-47 IDs in [`IanaToBcp47Map::bcp47_ids`] at the same index.
108 ///
109 /// The remaining non-canonical identifiers are sorted in ascending lowercase order.
110 #[cfg_attr(feature = "serde", serde(borrow))]
111 pub normalized_iana_ids: VarZeroVec<'data, str>,
112}
113
114icu_provider::data_struct!(
115 IanaNames<'_>,
116 #[cfg(feature = "datagen")]
117);