1use icu_provider::prelude::*;
12
13use super::data::MappingKind;
14use super::exception_helpers::{ExceptionBits, ExceptionSlot, SlotPresence};
15use crate::set::ClosureSink;
16use alloc::borrow::Cow;
17use core::fmt;
18#[cfg(any(feature = "serde", feature = "datagen"))]
19use core::ops::Range;
20use core::ptr;
21use zerovec::ule::AsULE;
22use zerovec::VarZeroVec;
23
24const SURROGATES_START: u32 = 0xD800;
25const SURROGATES_LEN: u32 = 0xDFFF - SURROGATES_START + 1;
26
27#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
37#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
38#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::exceptions))]
39#[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
40pub struct CaseMapExceptions<'data> {
41    #[cfg_attr(feature = "serde", serde(borrow))]
42    pub exceptions: VarZeroVec<'data, ExceptionULE>,
44}
45
46impl CaseMapExceptions<'_> {
47    pub fn get(&self, idx: u16) -> &ExceptionULE {
53        let exception = self.exceptions.get(idx.into());
54        debug_assert!(exception.is_some());
55
56        exception.unwrap_or(ExceptionULE::empty_exception())
57    }
58
59    #[cfg(any(feature = "serde", feature = "datagen"))]
60    pub(crate) fn validate(&self) -> Result<Range<u16>, &'static str> {
61        for exception in self.exceptions.iter() {
62            exception.validate()?;
63        }
64        u16::try_from(self.exceptions.len())
65            .map_err(|_| "Too many exceptions")
66            .map(|l| 0..l)
67    }
68}
69#[zerovec::make_varule(ExceptionULE)]
90#[derive(PartialEq, Eq, Clone, Default, Debug)]
91#[zerovec::skip_derive(Ord)]
92#[cfg_attr(
93    feature = "serde",
94    derive(serde::Deserialize),
95    zerovec::derive(Deserialize)
96)]
97#[cfg_attr(
98    feature = "datagen",
99    derive(serde::Serialize),
100    zerovec::derive(Serialize)
101)]
102pub struct Exception<'a> {
103    pub bits: ExceptionBits,
107    pub slot_presence: SlotPresence,
111    pub data: Cow<'a, str>,
131}
132
133impl ExceptionULE {
134    #[inline]
135    fn empty_exception() -> &'static Self {
136        static EMPTY_BYTES: &[u8] = &[0, 0];
137        unsafe {
142            let slice: *const [u8] = ptr::slice_from_raw_parts(EMPTY_BYTES.as_ptr(), 0);
143            &*(slice as *const Self)
144        }
145    }
146    pub(crate) fn has_slot(&self, slot: ExceptionSlot) -> bool {
147        self.slot_presence.has_slot(slot)
148    }
149    pub(crate) fn get_char_slot(&self, slot: ExceptionSlot) -> Option<char> {
152        if slot >= ExceptionSlot::STRING_SLOTS_START {
153            return None;
154        }
155        let bit = 1 << (slot as u8);
156        if self.slot_presence.0 & bit == 0 {
158            return None;
159        }
160
161        let previous_slot_mask = bit - 1;
162        let previous_slots = self.slot_presence.0 & previous_slot_mask;
163        let slot_num = previous_slots.count_ones() as usize;
164        self.data.chars().nth(slot_num)
165    }
166
167    fn get_simple_case_delta(&self) -> Option<u32> {
174        let delta_ch = self.get_char_slot(ExceptionSlot::Delta)?;
175        let mut delta = u32::from(delta_ch);
176        if delta >= SURROGATES_START {
178            delta -= SURROGATES_LEN;
179        }
180        Some(delta)
181    }
182
183    pub(crate) fn get_simple_case_slot_for(&self, ch: char) -> Option<char> {
191        let delta = self.get_simple_case_delta()?;
192        let mut delta = i32::try_from(delta).ok()?;
193        if self.bits.negative_delta() {
194            delta = -delta;
195        }
196
197        let new_ch = i32::try_from(u32::from(ch)).ok()? + delta;
198
199        char::try_from(u32::try_from(new_ch).ok()?).ok()
200    }
201
202    fn get_stringy_data(&self) -> Option<&str> {
204        const CHAR_MASK: u8 = (1 << ExceptionSlot::STRING_SLOTS_START as u8) - 1;
205        let char_slot_count = (self.slot_presence.0 & CHAR_MASK).count_ones() as usize;
206        let mut chars = self.data.chars();
207        for _ in 0..char_slot_count {
208            let res = chars.next();
209            res?;
210        }
211        Some(chars.as_str())
212    }
213
214    fn get_stringy_slot(&self, slot: ExceptionSlot) -> Option<&str> {
217        debug_assert!(slot == ExceptionSlot::Closure || slot == ExceptionSlot::FullMappings);
218        let other_slot = if slot == ExceptionSlot::Closure {
219            ExceptionSlot::FullMappings
220        } else {
221            ExceptionSlot::Closure
222        };
223        if !self.slot_presence.has_slot(slot) {
224            return None;
225        }
226        let stringy_data = self.get_stringy_data()?;
227
228        if self.slot_presence.has_slot(other_slot) {
229            let mut chars = stringy_data.chars();
231            let length_char = chars.next()?;
233
234            let length = usize::try_from(u32::from(length_char)).unwrap_or(0);
235            let remaining_slice = chars.as_str();
237            if slot == ExceptionSlot::Closure {
239                remaining_slice.get(0..length)
240            } else {
241                remaining_slice.get(length..)
242            }
243        } else {
244            Some(stringy_data)
246        }
247    }
248
249    pub(crate) fn get_closure_slot(&self) -> Option<&str> {
251        self.get_stringy_slot(ExceptionSlot::Closure)
252    }
253
254    fn get_fullmappings_slot_data(&self) -> Option<&str> {
258        self.get_stringy_slot(ExceptionSlot::FullMappings)
259    }
260
261    pub(crate) fn get_fullmappings_slot_for_kind(&self, kind: MappingKind) -> Option<&str> {
263        let data = self.get_fullmappings_slot_data()?;
264
265        let mut chars = data.chars();
266        let i1 = usize::try_from(u32::from(chars.next()?)).ok()?;
268        let i2 = usize::try_from(u32::from(chars.next()?)).ok()?;
269        let i3 = usize::try_from(u32::from(chars.next()?)).ok()?;
270        let remaining_slice = chars.as_str();
271        match kind {
273            MappingKind::Lower => remaining_slice.get(..i1),
274            MappingKind::Fold => remaining_slice.get(i1..i2),
275            MappingKind::Upper => remaining_slice.get(i2..i3),
276            MappingKind::Title => remaining_slice.get(i3..),
277        }
278    }
279
280    fn get_all_fullmapping_slots(&self) -> Option<[Cow<'_, str>; 4]> {
282        Some([
283            self.get_fullmappings_slot_for_kind(MappingKind::Lower)?
284                .into(),
285            self.get_fullmappings_slot_for_kind(MappingKind::Fold)?
286                .into(),
287            self.get_fullmappings_slot_for_kind(MappingKind::Upper)?
288                .into(),
289            self.get_fullmappings_slot_for_kind(MappingKind::Title)?
290                .into(),
291        ])
292    }
293
294    #[inline]
297    pub(crate) fn slot_char_for_kind(&self, kind: MappingKind) -> Option<char> {
298        match kind {
299            MappingKind::Lower | MappingKind::Upper => self.get_char_slot(kind.into()),
300            MappingKind::Fold => self
301                .get_char_slot(ExceptionSlot::Fold)
302                .or_else(|| self.get_char_slot(ExceptionSlot::Lower)),
303            MappingKind::Title => self
304                .get_char_slot(ExceptionSlot::Title)
305                .or_else(|| self.get_char_slot(ExceptionSlot::Upper)),
306        }
307    }
308
309    pub(crate) fn add_full_and_closure_mappings<S: ClosureSink>(&self, set: &mut S) {
310        if let Some(full) = self.get_fullmappings_slot_for_kind(MappingKind::Fold) {
311            if !full.is_empty() {
312                set.add_string(full);
313            }
314        };
315        if let Some(closure) = self.get_closure_slot() {
316            for c in closure.chars() {
317                set.add_char(c);
318            }
319        };
320    }
321
322    pub fn decode(&self) -> DecodedException<'_> {
326        let bits = self.bits;
332        let lowercase = self.get_char_slot(ExceptionSlot::Lower);
333        let casefold = self.get_char_slot(ExceptionSlot::Fold);
334        let uppercase = self.get_char_slot(ExceptionSlot::Upper);
335        let titlecase = self.get_char_slot(ExceptionSlot::Title);
336        let simple_case_delta = self.get_simple_case_delta();
337        let closure = self.get_closure_slot().map(Into::into);
338        let full = self.get_all_fullmapping_slots();
339
340        DecodedException {
341            bits: ExceptionBits::from_unaligned(bits),
342            lowercase,
343            casefold,
344            uppercase,
345            titlecase,
346            simple_case_delta,
347            closure,
348            full,
349        }
350    }
351
352    #[cfg(any(feature = "serde", feature = "datagen"))]
353    pub(crate) fn validate(&self) -> Result<(), &'static str> {
354        if self.bits.double_width_slots() {
357            return Err("double-width-slots should not be used in ICU4C");
358        }
359
360        let decoded = self.decode();
362
363        for (slot, decoded_slot) in [
364            (ExceptionSlot::Lower, &decoded.lowercase),
365            (ExceptionSlot::Fold, &decoded.casefold),
366            (ExceptionSlot::Upper, &decoded.uppercase),
367            (ExceptionSlot::Title, &decoded.titlecase),
368        ] {
369            if self.has_slot(slot) && decoded_slot.is_none() {
370                return Err("Slot decoding failed");
372            }
373        }
374        if self.has_slot(ExceptionSlot::Delta) && decoded.simple_case_delta.is_none() {
375            return Err("Slot decoding failed");
377        }
378
379        if self.has_slot(ExceptionSlot::Closure) && decoded.closure.is_none() {
380            return Err("Slot decoding failed");
381        }
382
383        if self.has_slot(ExceptionSlot::FullMappings) {
384            if decoded.full.is_some() {
385                let data = self
386                    .get_fullmappings_slot_data()
387                    .ok_or("fullmappings slot doesn't parse")?;
388                let mut chars = data.chars();
389                let i1 = u32::from(chars.next().ok_or("fullmappings string too small")?);
390                let i2 = u32::from(chars.next().ok_or("fullmappings string too small")?);
391                let i3 = u32::from(chars.next().ok_or("fullmappings string too small")?);
392
393                if i2 < i1 || i3 < i2 {
394                    return Err("fullmappings string contains non-sequential indices");
395                }
396                let rest = chars.as_str();
397                let len = u32::try_from(rest.len()).map_err(|_| "len too large for u32")?;
398
399                if i1 > len || i2 > len || i3 > len {
400                    return Err("fullmappings string contains out-of-bounds indices");
401                }
402            } else {
403                return Err("Slot decoding failed");
404            }
405        }
406
407        Ok(())
408    }
409}
410
411impl fmt::Debug for ExceptionULE {
412    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
413        self.decode().fmt(f)
414    }
415}
416
417#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
426#[cfg_attr(feature = "datagen", derive(serde::Serialize))]
427#[derive(Debug, Clone, PartialEq, Eq, Default)]
428pub struct DecodedException<'a> {
429    pub bits: ExceptionBits,
431    pub lowercase: Option<char>,
433    pub casefold: Option<char>,
435    pub uppercase: Option<char>,
437    pub titlecase: Option<char>,
439    pub simple_case_delta: Option<u32>,
441    pub closure: Option<Cow<'a, str>>,
443    pub full: Option<[Cow<'a, str>; 4]>,
445}
446
447impl DecodedException<'_> {
448    pub fn encode(&self) -> Exception<'static> {
450        let bits = self.bits;
451        let mut slot_presence = SlotPresence(0);
452        let mut data = alloc::string::String::new();
453        if let Some(lowercase) = self.lowercase {
454            slot_presence.add_slot(ExceptionSlot::Lower);
455            data.push(lowercase)
456        }
457        if let Some(casefold) = self.casefold {
458            slot_presence.add_slot(ExceptionSlot::Fold);
459            data.push(casefold)
460        }
461        if let Some(uppercase) = self.uppercase {
462            slot_presence.add_slot(ExceptionSlot::Upper);
463            data.push(uppercase)
464        }
465        if let Some(titlecase) = self.titlecase {
466            slot_presence.add_slot(ExceptionSlot::Title);
467            data.push(titlecase)
468        }
469        if let Some(mut simple_case_delta) = self.simple_case_delta {
470            slot_presence.add_slot(ExceptionSlot::Delta);
471
472            if simple_case_delta >= SURROGATES_START {
473                simple_case_delta += SURROGATES_LEN;
474            }
475            let simple_case_delta = char::try_from(simple_case_delta).unwrap_or('\0');
476            data.push(simple_case_delta)
477        }
478
479        if let Some(ref closure) = self.closure {
480            slot_presence.add_slot(ExceptionSlot::Closure);
481            if self.full.is_some() {
482                debug_assert!(
484                    closure.len() < 0xD800,
485                    "Found overlarge closure value when encoding exception"
486                );
487                let len_char = u32::try_from(closure.len())
488                    .ok()
489                    .and_then(|c| char::try_from(c).ok())
490                    .unwrap_or('\0');
491                data.push(len_char);
492            }
493            data.push_str(closure);
494        }
495        if let Some(ref full) = self.full {
496            slot_presence.add_slot(ExceptionSlot::FullMappings);
497            let mut idx = 0;
498            for mapping in full.iter().take(3) {
500                idx += mapping.len();
501                data.push(char::try_from(u32::try_from(idx).unwrap_or(0)).unwrap_or('\0'));
502            }
503            for mapping in full {
504                data.push_str(mapping);
505            }
506        }
507        Exception {
508            bits,
509            slot_presence,
510            data: data.into(),
511        }
512    }
513
514    }
517
518#[cfg(test)]
519mod tests {
520    use super::*;
521
522    fn test_roundtrip_once(exception: DecodedException) {
523        let encoded = exception.encode();
524        let encoded = zerovec::ule::encode_varule_to_box(&encoded);
525        let decoded = encoded.decode();
526        assert_eq!(decoded, exception);
527    }
528
529    #[test]
530    fn test_roundtrip() {
531        test_roundtrip_once(DecodedException {
532            lowercase: Some('ø'),
533            ..Default::default()
534        });
535        test_roundtrip_once(DecodedException {
536            titlecase: Some('X'),
537            lowercase: Some('ø'),
538            ..Default::default()
539        });
540        test_roundtrip_once(DecodedException {
541            titlecase: Some('X'),
542            ..Default::default()
543        });
544        test_roundtrip_once(DecodedException {
545            titlecase: Some('X'),
546            simple_case_delta: Some(0xE999),
547            closure: Some("hello world".into()),
548            ..Default::default()
549        });
550        test_roundtrip_once(DecodedException {
551            simple_case_delta: Some(10),
552            closure: Some("hello world".into()),
553            full: Some(["你好世界".into(), "".into(), "hi".into(), "å".into()]),
554            ..Default::default()
555        });
556        test_roundtrip_once(DecodedException {
557            closure: Some("hello world".into()),
558            full: Some(["aa".into(), "È›".into(), "".into(), "Ã¥".into()]),
559            ..Default::default()
560        });
561        test_roundtrip_once(DecodedException {
562            full: Some(["你好世界".into(), "".into(), "hi".into(), "å".into()]),
563            ..Default::default()
564        });
565    }
566}