1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

//! This module contains various types for the header part of casemapping exception data
//!
//! This is both used in datagen to decode ICU4C's data, and natively in ICU4X's
//! own data model.
//!
//! [`ExceptionBits`] is the bag of bits associated with exceptions, and [`SlotPresence`]
//! marks the presence or absence of various "slots" in a given exception.
//!
//! The `exceptions_builder` module of this crate handles decoding ICU4C data using the exception
//! header, and [`crate::provider::exceptions`] handles.

use crate::provider::data::{DotType, MappingKind};
use zerovec::ule::{AsULE, ULE};

/// A bunch of bits associated with each exception.
///
/// <div class="stab unstable">
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[derive(Copy, Clone, PartialEq, Eq, Debug, Default)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize))]
pub struct ExceptionBits {
    /// Whether or not the slots are double-width.
    ///
    /// Unused in ICU4X
    pub double_width_slots: bool,
    /// There is no simple casefolding, even if there is a simple lowercase mapping
    pub no_simple_case_folding: bool,
    /// The delta stored in the `Delta` slot is negative
    pub negative_delta: bool,
    /// If the character is case sensitive
    pub is_sensitive: bool,
    /// The dot type of the character
    pub dot_type: DotType,
    /// If the character has conditional special casing
    pub has_conditional_special: bool,
    /// If the character has conditional case folding
    pub has_conditional_fold: bool,
}

impl ExceptionBits {
    /// Extract from the upper half of an ICU4C-format u16
    pub(crate) fn from_integer(int: u8) -> Self {
        let ule = ExceptionBitsULE(int);
        let double_width_slots = ule.double_width_slots();
        let no_simple_case_folding = ule.no_simple_case_folding();
        let negative_delta = ule.negative_delta();
        let is_sensitive = ule.is_sensitive();
        let has_conditional_special = ule.has_conditional_special();
        let has_conditional_fold = ule.has_conditional_fold();
        let dot_type = ule.dot_type();

        Self {
            double_width_slots,
            no_simple_case_folding,
            negative_delta,
            is_sensitive,
            dot_type,
            has_conditional_special,
            has_conditional_fold,
        }
    }

    /// Convert to an ICU4C-format upper half of u16
    pub(crate) fn to_integer(self) -> u8 {
        let mut int = 0;
        let dot_data = (self.dot_type as u8) << ExceptionBitsULE::DOT_SHIFT;
        int |= dot_data;

        if self.double_width_slots {
            int |= ExceptionBitsULE::DOUBLE_SLOTS_FLAG
        }
        if self.no_simple_case_folding {
            int |= ExceptionBitsULE::NO_SIMPLE_CASE_FOLDING_FLAG
        }
        if self.negative_delta {
            int |= ExceptionBitsULE::NEGATIVE_DELTA_FLAG
        }
        if self.is_sensitive {
            int |= ExceptionBitsULE::SENSITIVE_FLAG
        }
        if self.has_conditional_special {
            int |= ExceptionBitsULE::CONDITIONAL_SPECIAL_FLAG
        }
        if self.has_conditional_fold {
            int |= ExceptionBitsULE::CONDITIONAL_FOLD_FLAG
        }
        int
    }
}

/// Packed slot presence marker
///
/// All bits are valid, though bit 4 is unused and reserved
///
/// Bits:
///
/// ```text
///               0: Lowercase mapping (code point)
///               1: Case folding (code point)
///               2: Uppercase mapping (code point)
///               3: Titlecase mapping (code point)
///               4: Delta to simple case mapping (code point) (sign stored separately)
///               5: RESERVED
///               6: Closure mappings (string; see below)
///               7: Full mappings (strings; see below)
/// ```
///
/// <div class="stab unstable">
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[derive(Copy, Clone, PartialEq, Eq, ULE, Debug, Default)]
#[repr(transparent)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize))]
pub struct SlotPresence(pub u8);

impl SlotPresence {
    pub(crate) fn add_slot(&mut self, slot: ExceptionSlot) {
        self.0 |= 1 << slot as u8;
    }
    pub(crate) fn has_slot(self, slot: ExceptionSlot) -> bool {
        let bit = 1 << (slot as u8);
        self.0 & bit != 0
    }
}

/// The bitflags on an exception header.
///
/// Format from icu4c, documented in casepropsbuilder.cpp, shifted 8 bits since ICU4C has this packed
/// alongside a SlotPresence
///
/// ```text
///            0  Double-width slots. If set, then each optional slot is stored as two
///               elements of the array (high and low halves of 32-bit values) instead of
///               a single element.
///            1  Has no simple case folding, even if there is a simple lowercase mapping
///           2  The value in the delta slot is negative
///           3  Is case-sensitive (not exposed)
///       4..5  Dot type
///           6  Has conditional special casing
///           7  Has conditional case folding
/// ```
///
/// All bits are valid, though in ICU4X data bits 0 and 2 are not used
///
/// <div class="stab unstable">
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[derive(Copy, Clone, PartialEq, Eq, ULE, Debug)]
#[repr(transparent)]
pub struct ExceptionBitsULE(pub u8);

impl ExceptionBitsULE {
    const DOUBLE_SLOTS_FLAG: u8 = 0x1;

    const NO_SIMPLE_CASE_FOLDING_FLAG: u8 = 0x2;
    const NEGATIVE_DELTA_FLAG: u8 = 0x4;
    const SENSITIVE_FLAG: u8 = 0x8;

    const DOT_SHIFT: u8 = 4;

    const CONDITIONAL_SPECIAL_FLAG: u8 = 0x40;
    const CONDITIONAL_FOLD_FLAG: u8 = 0x80;
}

impl ExceptionBitsULE {
    /// Whether or not the slots are double-width.
    ///
    /// Unused in ICU4X

    pub fn double_width_slots(self) -> bool {
        self.0 & Self::DOUBLE_SLOTS_FLAG != 0
    }

    /// There is no simple casefolding, even if there is a simple lowercase mapping
    pub fn no_simple_case_folding(self) -> bool {
        self.0 & Self::NO_SIMPLE_CASE_FOLDING_FLAG != 0
    }

    /// The delta stored in the `Delta` slot is negative
    pub fn negative_delta(self) -> bool {
        self.0 & Self::NEGATIVE_DELTA_FLAG != 0
    }

    /// If the character is case sensitive
    pub fn is_sensitive(self) -> bool {
        self.0 & Self::SENSITIVE_FLAG != 0
    }

    /// If the character has conditional special casing
    pub fn has_conditional_special(self) -> bool {
        self.0 & Self::CONDITIONAL_SPECIAL_FLAG != 0
    }

    /// If the character has conditional case folding
    pub fn has_conditional_fold(self) -> bool {
        self.0 & Self::CONDITIONAL_FOLD_FLAG != 0
    }

    /// The dot type of the character
    pub fn dot_type(self) -> DotType {
        DotType::from_masked_bits((u16::from(self.0 >> Self::DOT_SHIFT)) & DotType::DOT_MASK)
    }
}

impl AsULE for ExceptionBits {
    type ULE = ExceptionBitsULE;
    fn from_unaligned(u: ExceptionBitsULE) -> Self {
        ExceptionBits::from_integer(u.0)
    }

    fn to_unaligned(self) -> ExceptionBitsULE {
        ExceptionBitsULE(self.to_integer())
    }
}

impl AsULE for SlotPresence {
    type ULE = SlotPresence;
    fn from_unaligned(u: Self) -> Self {
        u
    }

    fn to_unaligned(self) -> Self {
        self
    }
}

/// The different slots that may be present in slot-based exception data
///
/// <div class="stab unstable">
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[derive(Copy, Clone, Debug, PartialOrd, Ord, PartialEq, Eq)]
pub(crate) enum ExceptionSlot {
    /// Lowercase mapping
    Lower = 0,
    /// Case folding
    Fold = 1,
    /// Uppercase mapping
    Upper = 2,
    /// Titlecase mapping
    Title = 3,
    /// The delta to the simple case folding
    Delta = 4,
    // Slot 5 is reserved
    /// The closure set
    Closure = 6,
    /// The four full-mappings
    FullMappings = 7,
}

impl ExceptionSlot {
    /// Where the string slots begin
    pub(crate) const STRING_SLOTS_START: Self = Self::Closure;
}

impl From<MappingKind> for ExceptionSlot {
    fn from(full: MappingKind) -> Self {
        match full {
            MappingKind::Lower => Self::Lower,
            MappingKind::Fold => Self::Fold,
            MappingKind::Upper => Self::Upper,
            MappingKind::Title => Self::Title,
        }
    }
}