1use icu_provider::prelude::*;
12
13use super::data::MappingKind;
14use super::exception_helpers::{ExceptionBits, ExceptionSlot, SlotPresence};
15use crate::set::ClosureSink;
16use alloc::borrow::Cow;
17use core::fmt;
18#[cfg(any(feature = "serde", feature = "datagen"))]
19use core::ops::Range;
20use core::ptr;
21use zerovec::ule::AsULE;
22use zerovec::VarZeroVec;
23
24const SURROGATES_START: u32 = 0xD800;
25const SURROGATES_LEN: u32 = 0xDFFF - SURROGATES_START + 1;
26
27#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
37#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
38#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::exceptions))]
39#[derive(Debug, Eq, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
40pub struct CaseMapExceptions<'data> {
41 #[cfg_attr(feature = "serde", serde(borrow))]
42 pub exceptions: VarZeroVec<'data, ExceptionULE>,
44}
45
46impl CaseMapExceptions<'_> {
47 pub fn get(&self, idx: u16) -> &ExceptionULE {
53 let exception = self.exceptions.get(idx.into());
54 debug_assert!(exception.is_some());
55
56 exception.unwrap_or(ExceptionULE::empty_exception())
57 }
58
59 #[cfg(any(feature = "serde", feature = "datagen"))]
60 pub(crate) fn validate(&self) -> Result<Range<u16>, &'static str> {
61 for exception in self.exceptions.iter() {
62 exception.validate()?;
63 }
64 u16::try_from(self.exceptions.len())
65 .map_err(|_| "Too many exceptions")
66 .map(|l| 0..l)
67 }
68}
69#[zerovec::make_varule(ExceptionULE)]
90#[derive(PartialEq, Eq, Clone, Default, Debug)]
91#[zerovec::skip_derive(Ord)]
92#[cfg_attr(
93 feature = "serde",
94 derive(serde::Deserialize),
95 zerovec::derive(Deserialize)
96)]
97#[cfg_attr(
98 feature = "datagen",
99 derive(serde::Serialize),
100 zerovec::derive(Serialize)
101)]
102pub struct Exception<'a> {
103 pub bits: ExceptionBits,
107 pub slot_presence: SlotPresence,
111 pub data: Cow<'a, str>,
131}
132
133impl ExceptionULE {
134 #[inline]
135 fn empty_exception() -> &'static Self {
136 static EMPTY_BYTES: &[u8] = &[0, 0];
137 unsafe {
142 let slice: *const [u8] = ptr::slice_from_raw_parts(EMPTY_BYTES.as_ptr(), 0);
143 &*(slice as *const Self)
144 }
145 }
146 pub(crate) fn has_slot(&self, slot: ExceptionSlot) -> bool {
147 self.slot_presence.has_slot(slot)
148 }
149 pub(crate) fn get_char_slot(&self, slot: ExceptionSlot) -> Option<char> {
152 if slot >= ExceptionSlot::STRING_SLOTS_START {
153 return None;
154 }
155 let bit = 1 << (slot as u8);
156 if self.slot_presence.0 & bit == 0 {
158 return None;
159 }
160
161 let previous_slot_mask = bit - 1;
162 let previous_slots = self.slot_presence.0 & previous_slot_mask;
163 let slot_num = previous_slots.count_ones() as usize;
164 self.data.chars().nth(slot_num)
165 }
166
167 fn get_simple_case_delta(&self) -> Option<u32> {
174 let delta_ch = self.get_char_slot(ExceptionSlot::Delta)?;
175 let mut delta = u32::from(delta_ch);
176 if delta >= SURROGATES_START {
178 delta -= SURROGATES_LEN;
179 }
180 Some(delta)
181 }
182
183 pub(crate) fn get_simple_case_slot_for(&self, ch: char) -> Option<char> {
191 let delta = self.get_simple_case_delta()?;
192 let mut delta = i32::try_from(delta).ok()?;
193 if self.bits.negative_delta() {
194 delta = -delta;
195 }
196
197 let new_ch = i32::try_from(u32::from(ch)).ok()? + delta;
198
199 char::try_from(u32::try_from(new_ch).ok()?).ok()
200 }
201
202 fn get_stringy_data(&self) -> Option<&str> {
204 const CHAR_MASK: u8 = (1 << ExceptionSlot::STRING_SLOTS_START as u8) - 1;
205 let char_slot_count = (self.slot_presence.0 & CHAR_MASK).count_ones() as usize;
206 let mut chars = self.data.chars();
207 for _ in 0..char_slot_count {
208 let res = chars.next();
209 res?;
210 }
211 Some(chars.as_str())
212 }
213
214 fn get_stringy_slot(&self, slot: ExceptionSlot) -> Option<&str> {
217 debug_assert!(slot == ExceptionSlot::Closure || slot == ExceptionSlot::FullMappings);
218 let other_slot = if slot == ExceptionSlot::Closure {
219 ExceptionSlot::FullMappings
220 } else {
221 ExceptionSlot::Closure
222 };
223 if !self.slot_presence.has_slot(slot) {
224 return None;
225 }
226 let stringy_data = self.get_stringy_data()?;
227
228 if self.slot_presence.has_slot(other_slot) {
229 let mut chars = stringy_data.chars();
231 let length_char = chars.next()?;
233
234 let length = usize::try_from(u32::from(length_char)).unwrap_or(0);
235 let remaining_slice = chars.as_str();
237 if slot == ExceptionSlot::Closure {
239 remaining_slice.get(0..length)
240 } else {
241 remaining_slice.get(length..)
242 }
243 } else {
244 Some(stringy_data)
246 }
247 }
248
249 pub(crate) fn get_closure_slot(&self) -> Option<&str> {
251 self.get_stringy_slot(ExceptionSlot::Closure)
252 }
253
254 fn get_fullmappings_slot_data(&self) -> Option<&str> {
258 self.get_stringy_slot(ExceptionSlot::FullMappings)
259 }
260
261 pub(crate) fn get_fullmappings_slot_for_kind(&self, kind: MappingKind) -> Option<&str> {
263 let data = self.get_fullmappings_slot_data()?;
264
265 let mut chars = data.chars();
266 let i1 = usize::try_from(u32::from(chars.next()?)).ok()?;
268 let i2 = usize::try_from(u32::from(chars.next()?)).ok()?;
269 let i3 = usize::try_from(u32::from(chars.next()?)).ok()?;
270 let remaining_slice = chars.as_str();
271 match kind {
273 MappingKind::Lower => remaining_slice.get(..i1),
274 MappingKind::Fold => remaining_slice.get(i1..i2),
275 MappingKind::Upper => remaining_slice.get(i2..i3),
276 MappingKind::Title => remaining_slice.get(i3..),
277 }
278 }
279
280 fn get_all_fullmapping_slots(&self) -> Option<[Cow<'_, str>; 4]> {
282 Some([
283 self.get_fullmappings_slot_for_kind(MappingKind::Lower)?
284 .into(),
285 self.get_fullmappings_slot_for_kind(MappingKind::Fold)?
286 .into(),
287 self.get_fullmappings_slot_for_kind(MappingKind::Upper)?
288 .into(),
289 self.get_fullmappings_slot_for_kind(MappingKind::Title)?
290 .into(),
291 ])
292 }
293
294 #[inline]
297 pub(crate) fn slot_char_for_kind(&self, kind: MappingKind) -> Option<char> {
298 match kind {
299 MappingKind::Lower | MappingKind::Upper => self.get_char_slot(kind.into()),
300 MappingKind::Fold => self
301 .get_char_slot(ExceptionSlot::Fold)
302 .or_else(|| self.get_char_slot(ExceptionSlot::Lower)),
303 MappingKind::Title => self
304 .get_char_slot(ExceptionSlot::Title)
305 .or_else(|| self.get_char_slot(ExceptionSlot::Upper)),
306 }
307 }
308
309 pub(crate) fn add_full_and_closure_mappings<S: ClosureSink>(&self, set: &mut S) {
310 if let Some(full) = self.get_fullmappings_slot_for_kind(MappingKind::Fold) {
311 if !full.is_empty() {
312 set.add_string(full);
313 }
314 };
315 if let Some(closure) = self.get_closure_slot() {
316 for c in closure.chars() {
317 set.add_char(c);
318 }
319 };
320 }
321
322 pub fn decode(&self) -> DecodedException<'_> {
326 let bits = self.bits;
332 let lowercase = self.get_char_slot(ExceptionSlot::Lower);
333 let casefold = self.get_char_slot(ExceptionSlot::Fold);
334 let uppercase = self.get_char_slot(ExceptionSlot::Upper);
335 let titlecase = self.get_char_slot(ExceptionSlot::Title);
336 let simple_case_delta = self.get_simple_case_delta();
337 let closure = self.get_closure_slot().map(Into::into);
338 let full = self.get_all_fullmapping_slots();
339
340 DecodedException {
341 bits: ExceptionBits::from_unaligned(bits),
342 lowercase,
343 casefold,
344 uppercase,
345 titlecase,
346 simple_case_delta,
347 closure,
348 full,
349 }
350 }
351
352 #[cfg(any(feature = "serde", feature = "datagen"))]
353 pub(crate) fn validate(&self) -> Result<(), &'static str> {
354 if self.bits.double_width_slots() {
357 return Err("double-width-slots should not be used in ICU4C");
358 }
359
360 let decoded = self.decode();
362
363 for (slot, decoded_slot) in [
364 (ExceptionSlot::Lower, &decoded.lowercase),
365 (ExceptionSlot::Fold, &decoded.casefold),
366 (ExceptionSlot::Upper, &decoded.uppercase),
367 (ExceptionSlot::Title, &decoded.titlecase),
368 ] {
369 if self.has_slot(slot) && decoded_slot.is_none() {
370 return Err("Slot decoding failed");
372 }
373 }
374 if self.has_slot(ExceptionSlot::Delta) && decoded.simple_case_delta.is_none() {
375 return Err("Slot decoding failed");
377 }
378
379 if self.has_slot(ExceptionSlot::Closure) && decoded.closure.is_none() {
380 return Err("Slot decoding failed");
381 }
382
383 if self.has_slot(ExceptionSlot::FullMappings) {
384 if decoded.full.is_some() {
385 let data = self
386 .get_fullmappings_slot_data()
387 .ok_or("fullmappings slot doesn't parse")?;
388 let mut chars = data.chars();
389 let i1 = u32::from(chars.next().ok_or("fullmappings string too small")?);
390 let i2 = u32::from(chars.next().ok_or("fullmappings string too small")?);
391 let i3 = u32::from(chars.next().ok_or("fullmappings string too small")?);
392
393 if i2 < i1 || i3 < i2 {
394 return Err("fullmappings string contains non-sequential indices");
395 }
396 let rest = chars.as_str();
397 let len = u32::try_from(rest.len()).map_err(|_| "len too large for u32")?;
398
399 if i1 > len || i2 > len || i3 > len {
400 return Err("fullmappings string contains out-of-bounds indices");
401 }
402 } else {
403 return Err("Slot decoding failed");
404 }
405 }
406
407 Ok(())
408 }
409}
410
411impl fmt::Debug for ExceptionULE {
412 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
413 self.decode().fmt(f)
414 }
415}
416
417#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
426#[cfg_attr(feature = "datagen", derive(serde::Serialize))]
427#[derive(Debug, Clone, PartialEq, Eq, Default)]
428pub struct DecodedException<'a> {
429 pub bits: ExceptionBits,
431 pub lowercase: Option<char>,
433 pub casefold: Option<char>,
435 pub uppercase: Option<char>,
437 pub titlecase: Option<char>,
439 pub simple_case_delta: Option<u32>,
441 pub closure: Option<Cow<'a, str>>,
443 pub full: Option<[Cow<'a, str>; 4]>,
445}
446
447impl DecodedException<'_> {
448 pub fn encode(&self) -> Exception<'static> {
450 let bits = self.bits;
451 let mut slot_presence = SlotPresence(0);
452 let mut data = alloc::string::String::new();
453 if let Some(lowercase) = self.lowercase {
454 slot_presence.add_slot(ExceptionSlot::Lower);
455 data.push(lowercase)
456 }
457 if let Some(casefold) = self.casefold {
458 slot_presence.add_slot(ExceptionSlot::Fold);
459 data.push(casefold)
460 }
461 if let Some(uppercase) = self.uppercase {
462 slot_presence.add_slot(ExceptionSlot::Upper);
463 data.push(uppercase)
464 }
465 if let Some(titlecase) = self.titlecase {
466 slot_presence.add_slot(ExceptionSlot::Title);
467 data.push(titlecase)
468 }
469 if let Some(mut simple_case_delta) = self.simple_case_delta {
470 slot_presence.add_slot(ExceptionSlot::Delta);
471
472 if simple_case_delta >= SURROGATES_START {
473 simple_case_delta += SURROGATES_LEN;
474 }
475 let simple_case_delta = char::try_from(simple_case_delta).unwrap_or('\0');
476 data.push(simple_case_delta)
477 }
478
479 if let Some(ref closure) = self.closure {
480 slot_presence.add_slot(ExceptionSlot::Closure);
481 if self.full.is_some() {
482 debug_assert!(
484 closure.len() < 0xD800,
485 "Found overlarge closure value when encoding exception"
486 );
487 let len_char = u32::try_from(closure.len())
488 .ok()
489 .and_then(|c| char::try_from(c).ok())
490 .unwrap_or('\0');
491 data.push(len_char);
492 }
493 data.push_str(closure);
494 }
495 if let Some(ref full) = self.full {
496 slot_presence.add_slot(ExceptionSlot::FullMappings);
497 let mut idx = 0;
498 for mapping in full.iter().take(3) {
500 idx += mapping.len();
501 data.push(char::try_from(u32::try_from(idx).unwrap_or(0)).unwrap_or('\0'));
502 }
503 for mapping in full {
504 data.push_str(mapping);
505 }
506 }
507 Exception {
508 bits,
509 slot_presence,
510 data: data.into(),
511 }
512 }
513
514 }
517
518#[cfg(test)]
519mod tests {
520 use super::*;
521
522 fn test_roundtrip_once(exception: DecodedException) {
523 let encoded = exception.encode();
524 let encoded = zerovec::ule::encode_varule_to_box(&encoded);
525 let decoded = encoded.decode();
526 assert_eq!(decoded, exception);
527 }
528
529 #[test]
530 fn test_roundtrip() {
531 test_roundtrip_once(DecodedException {
532 lowercase: Some('ø'),
533 ..Default::default()
534 });
535 test_roundtrip_once(DecodedException {
536 titlecase: Some('X'),
537 lowercase: Some('ø'),
538 ..Default::default()
539 });
540 test_roundtrip_once(DecodedException {
541 titlecase: Some('X'),
542 ..Default::default()
543 });
544 test_roundtrip_once(DecodedException {
545 titlecase: Some('X'),
546 simple_case_delta: Some(0xE999),
547 closure: Some("hello world".into()),
548 ..Default::default()
549 });
550 test_roundtrip_once(DecodedException {
551 simple_case_delta: Some(10),
552 closure: Some("hello world".into()),
553 full: Some(["ä½ å¥½ä¸–ç•Œ".into(), "".into(), "hi".into(), "Ã¥".into()]),
554 ..Default::default()
555 });
556 test_roundtrip_once(DecodedException {
557 closure: Some("hello world".into()),
558 full: Some(["aa".into(), "È›".into(), "".into(), "Ã¥".into()]),
559 ..Default::default()
560 });
561 test_roundtrip_once(DecodedException {
562 full: Some(["ä½ å¥½ä¸–ç•Œ".into(), "".into(), "hi".into(), "Ã¥".into()]),
563 ..Default::default()
564 });
565 }
566}