1#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
20
21use icu_collections::char16trie::Char16TrieIterator;
22use icu_collections::codepointtrie::CodePointTrie;
23use icu_provider::prelude::*;
24use zerovec::ule::AsULE;
25use zerovec::ZeroVec;
26use zerovec::{zeroslice, ZeroSlice};
27
28use crate::elements::CollationElement;
29use crate::elements::CollationElement32;
30use crate::elements::Tag;
31use crate::elements::EMPTY_U16;
32use crate::elements::FFFD_CE;
33use crate::elements::FFFD_CE32;
34use crate::elements::FFFD_CE32_VALUE;
35use crate::elements::FFFD_CE_VALUE;
36use crate::elements::NO_CE_PRIMARY;
37use crate::preferences::CollationCaseFirst;
38
39use crate::options::MaxVariable;
40
41#[cfg(feature = "compiled_data")]
42#[derive(Debug)]
43pub struct Baked;
51
52#[cfg(feature = "compiled_data")]
53#[allow(unused_imports)]
54const _: () = {
55 use icu_collator_data::*;
56 pub mod icu {
57 pub use crate as collator;
58 pub use icu_collections as collections;
59 pub use icu_locale as locale;
60 }
61 make_provider!(Baked);
62 impl_collation_root_v1!(Baked);
63 impl_collation_tailoring_v1!(Baked);
64 impl_collation_diacritics_v1!(Baked);
65 impl_collation_jamo_v1!(Baked);
66 impl_collation_metadata_v1!(Baked);
67 impl_collation_special_primaries_v1!(Baked);
68 impl_collation_reordering_v1!(Baked);
69};
70
71const SCRIPT_FALLBACK: icu_provider::fallback::LocaleFallbackConfig = {
72 let mut c = icu_provider::fallback::LocaleFallbackConfig::default();
73 c.priority = icu_provider::fallback::LocaleFallbackPriority::Script;
74 c
75};
76
77icu_provider::data_marker!(
78 CollationRootV1,
80 "collation/root/v1",
81 CollationData<'static>,
82 is_singleton = true,
83);
84icu_provider::data_marker!(
85 CollationTailoringV1,
87 "collation/tailoring/v1",
88 CollationData<'static>,
89 fallback_config = SCRIPT_FALLBACK,
90 #[cfg(feature = "datagen")]
91 attributes_domain = "collator",
92);
93icu_provider::data_marker!(
94 CollationDiacriticsV1,
96 "collation/diacritics/v1",
97 CollationDiacritics<'static>,
98 fallback_config = SCRIPT_FALLBACK,
99 #[cfg(feature = "datagen")]
100 attributes_domain = "collator",
101);
102icu_provider::data_marker!(
103 CollationJamoV1,
105 "collation/jamo/v1",
106 CollationJamo<'static>,
107 is_singleton = true,
108);
109icu_provider::data_marker!(
110 CollationReorderingV1,
112 "collation/reordering/v1",
113 CollationReordering<'static>,
114 fallback_config = SCRIPT_FALLBACK,
115 #[cfg(feature = "datagen")]
116 attributes_domain = "collator",
117);
118icu_provider::data_marker!(
119 CollationMetadataV1,
121 "collation/metadata/v1",
122 CollationMetadata,
123 fallback_config = SCRIPT_FALLBACK,
124 #[cfg(feature = "datagen")]
125 attributes_domain = "collator",
126);
127icu_provider::data_marker!(
128 CollationSpecialPrimariesV1,
130 "collation/special/primaries/v1",
131 CollationSpecialPrimaries<'static>,
132 is_singleton = true,
133);
134
135#[cfg(feature = "datagen")]
136pub const MARKERS: &[DataMarkerInfo] = &[
138 CollationRootV1::INFO,
139 CollationTailoringV1::INFO,
140 CollationDiacriticsV1::INFO,
141 CollationJamoV1::INFO,
142 CollationMetadataV1::INFO,
143 CollationReorderingV1::INFO,
144 CollationSpecialPrimariesV1::INFO,
145];
146
147const SINGLE_U32: &ZeroSlice<u32> =
148 zeroslice!(u32; <u32 as AsULE>::ULE::from_unsigned; [FFFD_CE32_VALUE]);
149const SINGLE_U64: &ZeroSlice<u64> =
150 zeroslice!(u64; <u64 as AsULE>::ULE::from_unsigned; [FFFD_CE_VALUE]);
151
152fn data_ce_to_primary(data_ce: u64, c: char) -> u32 {
153 let p = (data_ce >> 32) as u32; let lower32 = data_ce as u32 as i32; let mut offset = ((u32::from(c) as i32) - (lower32 >> 8)) * (lower32 & 0x7F); let is_compressible = (lower32 & 0x80) != 0;
158 offset += (((p >> 8) & 0xFF) as i32) - 2;
160 let mut primary = (((offset % 254) + 2) as u32) << 8;
161 offset /= 254;
162 if is_compressible {
165 offset += (((p >> 16) & 0xFF) as i32) - 4;
166 primary |= (((offset % 251) + 4) as u32) << 16;
167 offset /= 251;
168 } else {
169 offset += (((p >> 16) & 0xFF) as i32) - 2;
170 primary |= (((offset % 254) + 2) as u32) << 16;
171 offset /= 254;
172 }
173 primary | ((p & 0xFF000000) + ((offset as u32) << 24))
174}
175
176#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
184#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
185#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
186#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
187pub struct CollationData<'data> {
188 #[cfg_attr(feature = "serde", serde(borrow))]
191 pub trie: CodePointTrie<'data, u32>,
192 #[cfg_attr(feature = "serde", serde(borrow))]
195 pub ces: ZeroVec<'data, u64>,
196 #[cfg_attr(feature = "serde", serde(borrow))]
199 pub ce32s: ZeroVec<'data, u32>,
200 #[cfg_attr(feature = "serde", serde(borrow))]
202 pub contexts: ZeroVec<'data, u16>,
203}
204
205icu_provider::data_struct!(
206 CollationData<'_>,
207 #[cfg(feature = "datagen")]
208);
209
210impl<'data> CollationData<'data> {
211 pub(crate) fn ce32_for_char(&self, c: char) -> CollationElement32 {
212 CollationElement32::new(self.trie.get32(c as u32))
213 }
214 pub(crate) fn get_ce32(&'data self, index: usize) -> CollationElement32 {
215 CollationElement32::new(if let Some(u) = self.ce32s.get(index) {
216 u
217 } else {
218 debug_assert!(false);
220 FFFD_CE32_VALUE
221 })
222 }
223 pub(crate) fn get_ce32s(&'data self, index: usize, len: usize) -> &'data ZeroSlice<u32> {
224 if len > 0 {
225 if let Some(slice) = self.ce32s.get_subslice(index..index + len) {
226 return slice;
227 }
228 }
229 debug_assert!(false);
231 SINGLE_U32
232 }
233 pub(crate) fn get_ces(&'data self, index: usize, len: usize) -> &'data ZeroSlice<u64> {
234 if len > 0 {
235 if let Some(slice) = self.ces.get_subslice(index..index + len) {
236 return slice;
237 }
238 }
239 debug_assert!(false);
241 SINGLE_U64
242 }
243 fn get_default_and_trie_impl(
244 &'data self,
245 index: usize,
246 ) -> (CollationElement32, &'data ZeroSlice<u16>) {
247 if let Some(slice) = self.contexts.get_subslice(index..self.contexts.len()) {
248 #[allow(clippy::unwrap_used)]
249 if slice.len() >= 2 {
250 let first = slice.get(0).unwrap();
252 let second = slice.get(1).unwrap();
253 let trie = slice.get_subslice(2..slice.len()).unwrap();
254 return (
255 CollationElement32::new((u32::from(first) << 16) | u32::from(second)),
256 trie,
257 );
258 }
259 }
260 debug_assert!(false);
262 (FFFD_CE32, EMPTY_U16)
263 }
264 pub(crate) fn get_default_and_trie(
265 &'data self,
266 index: usize,
267 ) -> (CollationElement32, Char16TrieIterator<'data>) {
268 let (ce32, trie) = self.get_default_and_trie_impl(index);
269 (ce32, Char16TrieIterator::new(trie))
270 }
271 pub(crate) fn get_default(&'data self, index: usize) -> CollationElement32 {
272 let (ce32, _) = self.get_default_and_trie_impl(index);
273 ce32
274 }
275 pub(crate) fn ce_from_offset_ce32(
276 &self,
277 c: char,
278 ce32: CollationElement32,
279 ) -> CollationElement {
280 debug_assert!(ce32.tag() == Tag::Offset);
281 if let Some(data_ce) = self.ces.get(ce32.index()) {
282 CollationElement::new_from_primary(data_ce_to_primary(data_ce, c))
283 } else {
284 debug_assert!(false);
286 FFFD_CE
287 }
288 }
289}
290
291#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
299#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
300#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
301#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
302pub struct CollationDiacritics<'data> {
303 #[cfg_attr(feature = "serde", serde(borrow))]
308 pub secondaries: ZeroVec<'data, u16>,
309}
310
311icu_provider::data_struct!(
312 CollationDiacritics<'_>,
313 #[cfg(feature = "datagen")]
314);
315
316#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
324#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
325#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
326#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
327pub struct CollationJamo<'data> {
328 #[cfg_attr(feature = "serde", serde(borrow))]
331 pub ce32s: ZeroVec<'data, u32>,
332}
333
334icu_provider::data_struct!(
335 CollationJamo<'_>,
336 #[cfg(feature = "datagen")]
337);
338
339#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
347#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
348#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
349#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
350pub struct CollationReordering<'data> {
351 pub min_high_no_reorder: u32,
355 #[cfg_attr(feature = "serde", serde(borrow))]
363 pub reorder_table: ZeroVec<'data, u8>, #[cfg_attr(feature = "serde", serde(borrow))]
386 pub reorder_ranges: ZeroVec<'data, u32>,
387}
388
389icu_provider::data_struct!(
390 CollationReordering<'_>,
391 #[cfg(feature = "datagen")]
392);
393
394impl CollationReordering<'_> {
395 pub(crate) fn reorder(&self, primary: u32) -> u32 {
396 if let Some(b) = self.reorder_table.get((primary >> 24) as usize) {
397 if b != 0 || primary <= NO_CE_PRIMARY {
398 (u32::from(b) << 24) | (primary & 0x00FFFFFF)
399 } else {
400 self.reorder_ex(primary)
401 }
402 } else {
403 debug_assert!(false);
405 primary
406 }
407 }
408
409 fn reorder_ex(&self, primary: u32) -> u32 {
410 if primary >= self.min_high_no_reorder {
411 return primary;
412 }
413 let q = primary | 0xFFFF;
414 for &range in self.reorder_ranges.as_ule_slice().iter() {
415 let r = u32::from_unaligned(range);
416 if q < r {
417 return primary.wrapping_add(r << 24);
418 }
419 }
420 debug_assert!(false);
422 primary
423 }
424}
425
426#[derive(Debug, PartialEq, Clone, Copy, yoke::Yokeable, zerofrom::ZeroFrom)]
436#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
437#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
438#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
439pub struct CollationMetadata {
440 pub bits: u32,
450}
451
452icu_provider::data_struct!(
453 CollationMetadata,
454 #[cfg(feature = "datagen")]
455);
456
457impl CollationMetadata {
458 const MAX_VARIABLE_MASK: u32 = 0b11;
459 const TAILORED_MASK: u32 = 1 << 3;
460 const TAILORED_DIACRITICS_MASK: u32 = 1 << 4;
461 const REORDERING_MASK: u32 = 1 << 5;
462 const LITHUANIAN_DOT_ABOVE_MASK: u32 = 1 << 6;
463 const BACWARD_SECOND_LEVEL_MASK: u32 = 1 << 7;
464 const ALTERNATE_SHIFTED_MASK: u32 = 1 << 8;
465 const CASE_FIRST_MASK: u32 = 1 << 9;
466 const UPPER_FIRST_MASK: u32 = 1 << 10;
467
468 #[inline(always)]
469 pub(crate) fn max_variable(self) -> MaxVariable {
470 unsafe { core::mem::transmute((self.bits & CollationMetadata::MAX_VARIABLE_MASK) as u8) }
474 }
475
476 #[inline(always)]
477 pub(crate) fn tailored(self) -> bool {
478 self.bits & CollationMetadata::TAILORED_MASK != 0
479 }
480
481 #[inline(always)]
483 pub(crate) fn tailored_diacritics(self) -> bool {
484 self.bits & CollationMetadata::TAILORED_DIACRITICS_MASK != 0
485 }
486
487 #[inline(always)]
489 pub(crate) fn lithuanian_dot_above(self) -> bool {
490 self.bits & CollationMetadata::LITHUANIAN_DOT_ABOVE_MASK != 0
491 }
492
493 #[inline(always)]
495 pub(crate) fn backward_second_level(self) -> bool {
496 self.bits & CollationMetadata::BACWARD_SECOND_LEVEL_MASK != 0
497 }
498
499 #[inline(always)]
500 pub(crate) fn reordering(self) -> bool {
501 self.bits & CollationMetadata::REORDERING_MASK != 0
502 }
503
504 #[inline(always)]
506 pub(crate) fn alternate_shifted(self) -> bool {
507 self.bits & CollationMetadata::ALTERNATE_SHIFTED_MASK != 0
508 }
509
510 #[inline(always)]
511 pub(crate) fn case_first(self) -> CollationCaseFirst {
512 if self.bits & CollationMetadata::CASE_FIRST_MASK != 0 {
513 if self.bits & CollationMetadata::UPPER_FIRST_MASK != 0 {
514 CollationCaseFirst::Upper
515 } else {
516 CollationCaseFirst::Lower
517 }
518 } else {
519 CollationCaseFirst::False
520 }
521 }
522}
523
524#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
532#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
533#[cfg_attr(feature = "datagen", databake(path = icu_collator::provider))]
534#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
535pub struct CollationSpecialPrimaries<'data> {
536 #[cfg_attr(feature = "serde", serde(borrow))]
541 pub last_primaries: ZeroVec<'data, u16>,
542 pub numeric_primary: u8,
544}
545
546icu_provider::data_struct!(
547 CollationSpecialPrimaries<'_>,
548 #[cfg(feature = "datagen")]
549);
550
551impl CollationSpecialPrimaries<'_> {
552 #[allow(clippy::unwrap_used)]
553 pub(crate) fn last_primary_for_group(&self, max_variable: MaxVariable) -> u32 {
554 (u32::from(self.last_primaries.get(max_variable as usize).unwrap()) << 16) - 1
559 }
560}