icu_collator/options.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5// The bit layout of `CollatorOptions` is adapted from ICU4C and, therefore,
6// is subject to the ICU license as described in LICENSE.
7
8//! This module contains the types that are part of the API for setting
9//! the options for the collator.
10
11use crate::{
12 elements::{CASE_MASK, TERTIARY_MASK},
13 preferences::CollationCaseFirst,
14 preferences::CollationNumericOrdering,
15 CollatorPreferences,
16};
17
18/// The collation strength that indicates how many levels to compare.
19///
20/// If an earlier level isn't equal, the earlier level is decisive.
21/// If the result is equal on a level, but the strength is higher,
22/// the comparison proceeds to the next level.
23///
24/// Note: The bit layout of `CollatorOptions` requires `Strength`
25/// to fit in 3 bits.
26#[derive(Eq, PartialEq, Debug, Copy, Clone, PartialOrd, Ord)]
27#[repr(u8)]
28#[non_exhaustive]
29pub enum Strength {
30 /// Compare only on the level of base letters. This level
31 /// corresponds to the ECMA-402 sensitivity "base" with
32 /// [`CaseLevel::Off`] (the default for [`CaseLevel`]) and
33 /// to ECMA-402 sensitivity "case" with [`CaseLevel::On`].
34 ///
35 /// ```
36 /// use icu::collator::{options::*, *};
37 ///
38 /// let mut options = CollatorOptions::default();
39 /// options.strength = Some(Strength::Primary);
40 /// let collator = Collator::try_new(Default::default(), options).unwrap();
41 /// assert_eq!(collator.compare("E", "é"), core::cmp::Ordering::Equal);
42 /// ```
43 Primary = 0,
44
45 /// Compare also on the secondary level, which corresponds
46 /// to diacritics in scripts that use them. This level corresponds
47 /// to the ECMA-402 sensitivity "accent".
48 ///
49 /// ```
50 /// use icu::collator::{options::*, *};
51 ///
52 /// let mut options = CollatorOptions::default();
53 /// options.strength = Some(Strength::Secondary);
54 /// let collator = Collator::try_new(Default::default(), options).unwrap();
55 /// assert_eq!(collator.compare("E", "e"), core::cmp::Ordering::Equal);
56 /// assert_eq!(collator.compare("e", "é"), core::cmp::Ordering::Less);
57 /// assert_eq!(collator.compare("あ", "ア"), core::cmp::Ordering::Equal);
58 /// assert_eq!(collator.compare("ァ", "ア"), core::cmp::Ordering::Equal);
59 /// assert_eq!(collator.compare("ア", "ア"), core::cmp::Ordering::Equal);
60 /// ```
61 Secondary = 1,
62
63 /// Compare also on the tertiary level. By default, if the separate
64 /// case level is disabled, this corresponds to case for bicameral
65 /// scripts. This level distinguishes Hiragana and Katakana. This
66 /// also captures other minor differences, such as half-width vs.
67 /// full-width when the Japanese tailoring isn't in use.
68 ///
69 /// This is the default comparison level and appropriate for
70 /// most scripts. This level corresponds to the ECMA-402
71 /// sensitivity "variant".
72 ///
73 /// ```
74 /// use icu::collator::{*, options::*};
75 /// use icu::locale::locale;
76 ///
77 /// let mut options = CollatorOptions::default();
78 /// options.strength = Some(Strength::Tertiary);
79 /// let collator =
80 /// Collator::try_new(Default::default(),
81 /// options).unwrap();
82 /// assert_eq!(collator.compare("E", "e"),
83 /// core::cmp::Ordering::Greater);
84 /// assert_eq!(collator.compare("e", "é"),
85 /// core::cmp::Ordering::Less);
86 /// assert_eq!(collator.compare("あ", "ア"),
87 /// core::cmp::Ordering::Less);
88 /// assert_eq!(collator.compare("ァ", "ア"),
89 /// core::cmp::Ordering::Less);
90 /// assert_eq!(collator.compare("ア", "ア"),
91 /// core::cmp::Ordering::Less);
92 /// assert_eq!(collator.compare("e", "e"), // Full-width e
93 /// core::cmp::Ordering::Less);
94 ///
95 /// let ja_collator =
96 /// Collator::try_new(locale!("ja").into(), options).unwrap();
97 /// assert_eq!(ja_collator.compare("E", "e"),
98 /// core::cmp::Ordering::Greater);
99 /// assert_eq!(ja_collator.compare("e", "é"),
100 /// core::cmp::Ordering::Less);
101 /// assert_eq!(ja_collator.compare("あ", "ア"),
102 /// core::cmp::Ordering::Equal); // Unlike root!
103 /// assert_eq!(ja_collator.compare("ァ", "ア"),
104 /// core::cmp::Ordering::Less);
105 /// assert_eq!(ja_collator.compare("ア", "ア"),
106 /// core::cmp::Ordering::Equal); // Unlike root!
107 /// assert_eq!(ja_collator.compare("e", "e"), // Full-width e
108 /// core::cmp::Ordering::Equal); // Unlike root!
109 /// ```
110 Tertiary = 2,
111
112 /// Compare also on the quaternary level. For Japanese, Higana
113 /// and Katakana are distinguished at the quaternary level. Also,
114 /// if `AlternateHandling::Shifted` is used, the collation
115 /// elements whose level gets shifted are shifted to this
116 /// level.
117 ///
118 /// ```
119 /// use icu::collator::{*, options::*};
120 /// use icu::locale::locale;
121 ///
122 /// let mut options = CollatorOptions::default();
123 /// options.strength = Some(Strength::Quaternary);
124 ///
125 /// let ja_collator =
126 /// Collator::try_new(locale!("ja").into(), options).unwrap();
127 /// assert_eq!(ja_collator.compare("あ", "ア"),
128 /// core::cmp::Ordering::Less);
129 /// assert_eq!(ja_collator.compare("ア", "ア"),
130 /// core::cmp::Ordering::Equal);
131 /// assert_eq!(ja_collator.compare("e", "e"), // Full-width e
132 /// core::cmp::Ordering::Equal);
133 ///
134 /// // Even this level doesn't distinguish everything,
135 /// // e.g. Hebrew cantillation marks are still ignored.
136 /// let collator =
137 /// Collator::try_new(Default::default(),
138 /// options).unwrap();
139 /// assert_eq!(collator.compare("דחי", "דחי֭"),
140 /// core::cmp::Ordering::Equal);
141 /// ```
142 // TODO: Thai example.
143 Quaternary = 3,
144
145 /// Compare the NFD form by code point order as the quinary
146 /// level. This level makes the comparison slower and should
147 /// not be used in the general case. However, it can be used
148 /// to distinguish full-width and half-width forms when the
149 /// Japanese tailoring is in use and to distinguish e.g.
150 /// Hebrew cantillation markse. Use this level if you need
151 /// JIS X 4061-1996 compliance for Japanese on the level of
152 /// distinguishing full-width and half-width forms.
153 ///
154 /// ```
155 /// use icu::collator::{*, options::*};
156 /// use icu::locale::locale;
157 ///
158 /// let mut options = CollatorOptions::default();
159 /// options.strength = Some(Strength::Identical);
160 ///
161 /// let ja_collator =
162 /// Collator::try_new(locale!("ja").into(), options).unwrap();
163 /// assert_eq!(ja_collator.compare("ア", "ア"),
164 /// core::cmp::Ordering::Less);
165 /// assert_eq!(ja_collator.compare("e", "e"), // Full-width e
166 /// core::cmp::Ordering::Less);
167 ///
168 /// let collator =
169 /// Collator::try_new(Default::default(),
170 /// options).unwrap();
171 /// assert_eq!(collator.compare("דחי", "דחי֭"),
172 /// core::cmp::Ordering::Less);
173 /// ```
174 Identical = 7,
175}
176
177/// What to do about characters whose comparison level can be
178/// varied dynamically.
179#[derive(Eq, PartialEq, Debug, Copy, Clone, PartialOrd, Ord)]
180#[repr(u8)]
181#[non_exhaustive]
182pub enum AlternateHandling {
183 /// Keep the characters whose level can be varied on the
184 /// primary level.
185 NonIgnorable = 0,
186 /// Shift the characters at or below `MaxVariable` to the
187 /// quaternary level.
188 Shifted = 1,
189 // Possible future values: ShiftTrimmed, Blanked
190}
191
192/// What characters get shifted to the quaternary level
193/// with `AlternateHandling::Shifted`.
194#[derive(Eq, PartialEq, Debug, Copy, Clone)]
195#[repr(u8)] // This repr is necessary for transmute safety
196#[non_exhaustive]
197pub enum MaxVariable {
198 /// Characters classified as spaces are shifted.
199 Space = 0,
200 /// Characters classified as spaces or punctuation
201 /// are shifted.
202 Punctuation = 1,
203 /// Characters classified as spaces, punctuation,
204 /// or symbols are shifted.
205 Symbol = 2,
206 /// Characters classified as spaces, punctuation,
207 /// symbols, or currency symbols are shifted.
208 Currency = 3,
209}
210
211/// Whether to distinguish case in sorting, even for sorting levels higher
212/// than tertiary, without having to use tertiary level just to enable case level differences.
213#[derive(Eq, PartialEq, Debug, Copy, Clone)]
214#[repr(u8)]
215#[non_exhaustive]
216pub enum CaseLevel {
217 /// Leave off the case level option. Case differences will be handled by default
218 /// in tertiary strength.
219 Off = 0,
220 /// Turn on the case level option, thereby making a separate level for case
221 /// differences, positioned between secondary and tertiary.
222 ///
223 /// When used together with [`Strength::Primary`], this corresponds to the
224 /// ECMA-402 sensitivity "case".
225 On = 1,
226}
227
228/// Options settable by the user of the API.
229///
230/// With the exception of reordering (BCP47 `kr`), options that can by implied by locale are
231/// set via [`CollatorPreferences`].
232///
233/// See the [spec](https://www.unicode.org/reports/tr35/tr35-collation.html#Setting_Options).
234///
235/// The setters take an `Option` so that `None` can be used to go back to default.
236///
237/// # Options
238///
239/// Examples for using the different options below can be found in the [crate-level docs](crate).
240///
241/// ## ECMA-402 Sensitivity
242///
243/// ECMA-402 `sensitivity` maps to a combination of [`Strength`] and [`CaseLevel`] as follows:
244///
245/// <dl>
246/// <dt><code>sensitivity: "base"</code></dt>
247/// <dd><a href="enum.Strength.html#variant.Primary"><code>Strength::Primary</code></a></dd>
248/// <dt><code>sensitivity: "accent"</code></dt>
249/// <dd><a href="enum.Strength.html#variant.Secondary"><code>Strength::Secondary</code></a></dd>
250/// <dt><code>sensitivity: "case"</code></dt>
251/// <dd><a href="enum.Strength.html#variant.Primary"><code>Strength::Primary</code></a> and <a href="enum.CaseLevel.html#variant.On"><code>CaseLevel::On</code></a></dd>
252/// <dt><code>sensitivity: "variant"</code></dt>
253/// <dd><a href="enum.Strength.html#variant.Tertiary"><code>Strength::Tertiary</code></a></dd>
254/// </dl>
255///
256/// ## Strength
257///
258/// This is the BCP47 key `ks`. The default is [`Strength::Tertiary`].
259///
260/// ## Alternate Handling
261///
262/// This is the BCP47 key `ka`. Note that `AlternateHandling::ShiftTrimmed` and
263/// `AlternateHandling::Blanked` are unimplemented. The default is
264/// [`AlternateHandling::NonIgnorable`], except
265/// for Thai, whose default is [`AlternateHandling::Shifted`].
266///
267/// ## Case Level
268///
269/// See the [spec](https://www.unicode.org/reports/tr35/tr35-collation.html#Case_Parameters).
270/// This is the BCP47 key `kc`. The default is [`CaseLevel::Off`].
271///
272/// # Unsupported BCP47 options
273///
274/// Reordering (BCP47 `kr`) currently cannot be set via the API and is implied
275/// by the locale of the collation. `kr` is prohibited by ECMA-402.
276///
277/// Backward second level (BCP47 `kb`) cannot be set via the API and is implied
278/// by the locale of the collation (in practice only `fr-CA` turns it on and it's
279/// off otherwise). `kb` is prohibited by ECMA-402.
280///
281/// Normalization is always enabled and cannot be turned off. Therefore, there
282/// is no option corresponding to BCP47 `kk`. `kk` is prohibited by ECMA-402.
283///
284/// Hiragana quaternary handling is part of the strength for the Japanese
285/// tailoring. The BCP47 key `kh` is unsupported. `kh` is deprecated and
286/// prohibited by ECMA-402.
287///
288/// Variable top (BCP47 `vt`) is unsupported (use Max Variable instead). `vt`
289/// is deprecated and prohibited by ECMA-402.
290///
291/// ## ECMA-402 Usage
292///
293/// ECMA-402 `usage: "search"` is represented as `-u-co-search` as part of the
294/// locale in ICU4X. However, neither ECMA-402 nor ICU4X provides prefix matching
295/// or substring matching API surface. This makes the utility of search collations
296/// very narrow: With `-u-co-search`, [`Strength::Primary`], and observing whether
297/// comparison output is [`core::cmp::Ordering::Equal`] (making no distinction between
298/// [`core::cmp::Ordering::Less`] and [`core::cmp::Ordering::Greater`]), it is
299/// possible to check if a set of human-readable strings contains a full-string
300/// fuzzy match of a user-entered string, where "fuzzy" means case-insensitive and
301/// accent-insensitive for scripts that have such concepts and something roughly
302/// similar for other scripts.
303///
304/// Due to the very limited utility, ICU4X data does not include search collations
305/// by default.
306#[non_exhaustive]
307#[derive(Debug, Copy, Clone, Default)]
308pub struct CollatorOptions {
309 /// User-specified strength collation option.
310 pub strength: Option<Strength>,
311 /// User-specified alternate handling collation option.
312 pub alternate_handling: Option<AlternateHandling>,
313 /// User-specified max variable collation option.
314 pub max_variable: Option<MaxVariable>,
315 /// User-specified case level collation option.
316 pub case_level: Option<CaseLevel>,
317}
318
319impl CollatorOptions {
320 /// Create a new `CollatorOptions` with the defaults.
321 pub const fn default() -> Self {
322 Self {
323 strength: None,
324 alternate_handling: None,
325 max_variable: None,
326 case_level: None,
327 }
328 }
329}
330
331// Make it possible to easily copy the resolved options of
332// one collator into another collator.
333impl From<ResolvedCollatorOptions> for CollatorOptions {
334 /// Convenience conversion for copying the options from an
335 /// existing collator into a new one (overriding any locale-provided
336 /// defaults of the new one!).
337 fn from(options: ResolvedCollatorOptions) -> CollatorOptions {
338 Self {
339 strength: Some(options.strength),
340 alternate_handling: Some(options.alternate_handling),
341 max_variable: Some(options.max_variable),
342 case_level: Some(options.case_level),
343 }
344 }
345}
346
347// Make it possible to easily copy the resolved preferences of
348// one collator into another collator.
349impl From<ResolvedCollatorOptions> for CollatorPreferences {
350 /// Convenience conversion for copying the preferences from an
351 /// existing collator into a new one.
352 ///
353 /// Note that some preferences may not be fully preserved when recovering them
354 /// from an already initialized collator e.g [`LocalePreferences`] and [`CollationType`], because
355 /// those are only relevant when loading the collation data.
356 ///
357 /// [`LocalePreferences`]: icu_locale_core::preferences::LocalePreferences
358 /// [`CollationType`]: crate::preferences::CollationType
359 fn from(options: ResolvedCollatorOptions) -> CollatorPreferences {
360 CollatorPreferences {
361 case_first: Some(options.case_first),
362 numeric_ordering: Some(options.numeric),
363 ..Default::default()
364 }
365 }
366}
367
368/// The resolved (actually used) options used by the collator.
369///
370/// See the documentation of `CollatorOptions`.
371#[non_exhaustive]
372#[derive(Debug, Copy, Clone)]
373pub struct ResolvedCollatorOptions {
374 /// Resolved strength collation option.
375 pub strength: Strength,
376 /// Resolved alternate handling collation option.
377 pub alternate_handling: AlternateHandling,
378 /// Resolved case first collation option.
379 pub case_first: CollationCaseFirst,
380 /// Resolved max variable collation option.
381 pub max_variable: MaxVariable,
382 /// Resolved case level collation option.
383 pub case_level: CaseLevel,
384 /// Resolved numeric collation option.
385 pub numeric: CollationNumericOrdering,
386}
387
388impl From<CollatorOptionsBitField> for ResolvedCollatorOptions {
389 fn from(options: CollatorOptionsBitField) -> ResolvedCollatorOptions {
390 Self {
391 strength: options.strength(),
392 alternate_handling: options.alternate_handling(),
393 case_first: options.case_first(),
394 max_variable: options.max_variable(),
395 case_level: if options.case_level() {
396 CaseLevel::On
397 } else {
398 CaseLevel::Off
399 },
400 numeric: if options.numeric() {
401 CollationNumericOrdering::True
402 } else {
403 CollationNumericOrdering::False
404 },
405 // `options.backward_second_level()` not exposed.
406 }
407 }
408}
409
410#[derive(Copy, Clone, Debug)]
411pub(crate) struct CollatorOptionsBitField(u32);
412
413impl Default for CollatorOptionsBitField {
414 fn default() -> Self {
415 Self::default()
416 }
417}
418
419impl CollatorOptionsBitField {
420 /// Bits 0..2 : Strength
421 const STRENGTH_MASK: u32 = 0b111;
422 /// Bits 3..4 : Alternate handling: 00 non-ignorable, 01 shifted,
423 /// 10 reserved for shift-trimmed, 11 reserved for blanked.
424 /// In other words, bit 4 is currently always 0.
425 const ALTERNATE_HANDLING_MASK: u32 = 1 << 3;
426 /// Bits 5..6 : 2-bit max variable value to be shifted by `MAX_VARIABLE_SHIFT`.
427 const MAX_VARIABLE_MASK: u32 = 0b01100000;
428 const MAX_VARIABLE_SHIFT: u32 = 5;
429 /// Bit 7 : Reserved for extending max variable.
430 /// Bit 8 : Sort uppercase first if case level or case first is on.
431 const UPPER_FIRST_MASK: u32 = 1 << 8;
432 /// Bit 9 : Keep the case bits in the tertiary weight (they trump
433 /// other tertiary values)
434 /// unless case level is on (when they are *moved* into the separate case level).
435 /// By default, the case bits are removed from the tertiary weight (ignored).
436 /// When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to
437 /// the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs.
438 /// UCOL_UPPER_FIRST.
439 const CASE_FIRST_MASK: u32 = 1 << 9;
440 /// Bit 10 : Insert the case level between the secondary and tertiary levels.
441 const CASE_LEVEL_MASK: u32 = 1 << 10;
442 /// Bit 11 : Backward secondary level
443 const BACKWARD_SECOND_LEVEL_MASK: u32 = 1 << 11;
444 /// Bit 12 : Numeric
445 const NUMERIC_MASK: u32 = 1 << 12;
446
447 /// Whether strength is explicitly set.
448 const EXPLICIT_STRENGTH_MASK: u32 = 1 << 31;
449 /// Whether max variable is explicitly set.
450 const EXPLICIT_MAX_VARIABLE_MASK: u32 = 1 << 30;
451 /// Whether alternate handling is explicitly set.
452 const EXPLICIT_ALTERNATE_HANDLING_MASK: u32 = 1 << 29;
453 /// Whether case level is explicitly set.
454 const EXPLICIT_CASE_LEVEL_MASK: u32 = 1 << 28;
455 /// Whether case first is explicitly set.
456 const EXPLICIT_CASE_FIRST_MASK: u32 = 1 << 27;
457 /// Whether backward secondary is explicitly set.
458 const EXPLICIT_BACKWARD_SECOND_LEVEL_MASK: u32 = 1 << 26;
459 /// Whether numeric is explicitly set.
460 const EXPLICIT_NUMERIC_MASK: u32 = 1 << 25;
461
462 /// Create a new [`CollatorOptionsBitField`] with the defaults.
463 pub const fn default() -> Self {
464 Self(Strength::Tertiary as u32)
465 }
466
467 /// This is the BCP47 key `ks`.
468 pub fn strength(self) -> Strength {
469 let mut bits = self.0 & CollatorOptionsBitField::STRENGTH_MASK;
470 if !(bits <= 3 || bits == 7) {
471 debug_assert!(false, "Bad value for strength.");
472 // If the bits say higher than `Quaternary` but
473 // lower than `Identical`, clamp to `Quaternary`.
474 bits = 3;
475 }
476 // Safety: Strength is repr(u8) and has discriminants between 0 and 7. The
477 // above code ensures that, since the mask puts us `≤ 8`
478 unsafe { core::mem::transmute(bits as u8) }
479 }
480
481 /// This is the BCP47 key `ks`. See the enum for examples.
482 pub fn set_strength(&mut self, strength: Option<Strength>) {
483 self.0 &= !CollatorOptionsBitField::STRENGTH_MASK;
484 if let Some(strength) = strength {
485 self.0 |= CollatorOptionsBitField::EXPLICIT_STRENGTH_MASK;
486 self.0 |= strength as u32;
487 } else {
488 self.0 &= !CollatorOptionsBitField::EXPLICIT_STRENGTH_MASK;
489 }
490 }
491
492 /// The maximum character class that `AlternateHandling::Shifted`
493 /// applies to.
494 pub fn max_variable(self) -> MaxVariable {
495 // Safe, because we mask two bits and shift them to the low
496 // two bits and the enum has values for 0 to 3, inclusive.
497 unsafe {
498 core::mem::transmute(
499 ((self.0 & CollatorOptionsBitField::MAX_VARIABLE_MASK)
500 >> CollatorOptionsBitField::MAX_VARIABLE_SHIFT) as u8,
501 )
502 }
503 }
504
505 /// The maximum character class that `AlternateHandling::Shifted`
506 /// applies to. See the enum for examples.
507 pub fn set_max_variable(&mut self, max_variable: Option<MaxVariable>) {
508 self.0 &= !CollatorOptionsBitField::MAX_VARIABLE_MASK;
509 if let Some(max_variable) = max_variable {
510 self.0 |= CollatorOptionsBitField::EXPLICIT_MAX_VARIABLE_MASK;
511 self.0 |= (max_variable as u32) << CollatorOptionsBitField::MAX_VARIABLE_SHIFT;
512 } else {
513 self.0 &= !CollatorOptionsBitField::EXPLICIT_MAX_VARIABLE_MASK;
514 }
515 }
516
517 /// Whether certain characters are moved from the primary level to
518 /// the quaternary level.
519 pub fn alternate_handling(self) -> AlternateHandling {
520 if (self.0 & CollatorOptionsBitField::ALTERNATE_HANDLING_MASK) != 0 {
521 AlternateHandling::Shifted
522 } else {
523 AlternateHandling::NonIgnorable
524 }
525 }
526
527 /// Whether certain characters are moved from the primary level to
528 /// the quaternary level. See the enum for examples.
529 pub fn set_alternate_handling(&mut self, alternate_handling: Option<AlternateHandling>) {
530 self.0 &= !CollatorOptionsBitField::ALTERNATE_HANDLING_MASK;
531 if let Some(alternate_handling) = alternate_handling {
532 self.0 |= CollatorOptionsBitField::EXPLICIT_ALTERNATE_HANDLING_MASK;
533 if alternate_handling == AlternateHandling::Shifted {
534 self.0 |= CollatorOptionsBitField::ALTERNATE_HANDLING_MASK;
535 }
536 } else {
537 self.0 &= !CollatorOptionsBitField::EXPLICIT_ALTERNATE_HANDLING_MASK;
538 }
539 }
540
541 /// Whether there's a dedicated case level.
542 pub fn case_level(self) -> bool {
543 (self.0 & CollatorOptionsBitField::CASE_LEVEL_MASK) != 0
544 }
545
546 /// Whether there's a dedicated case level. If `true`, detaches
547 /// the case aspect of the tertiary level and inserts it between
548 /// the secondary and tertiary levels. Can be combined with the
549 /// primary-only strength. Setting this to `true` with
550 /// `Strength::Primary` corresponds to the ECMA-402 sensitivity
551 /// "case".
552 ///
553 /// See [the ICU guide](https://unicode-org.github.io/icu/userguide/collation/concepts.html#caselevel).
554 pub fn set_case_level(&mut self, case_level: Option<bool>) {
555 self.0 &= !CollatorOptionsBitField::CASE_LEVEL_MASK;
556 if let Some(case_level) = case_level {
557 self.0 |= CollatorOptionsBitField::EXPLICIT_CASE_LEVEL_MASK;
558 if case_level {
559 self.0 |= CollatorOptionsBitField::CASE_LEVEL_MASK;
560 }
561 } else {
562 self.0 &= !CollatorOptionsBitField::EXPLICIT_CASE_LEVEL_MASK;
563 }
564 }
565
566 pub fn set_case_level_from_enum(&mut self, case_level: Option<CaseLevel>) {
567 match case_level {
568 Some(CaseLevel::On) => {
569 self.set_case_level(Some(true));
570 }
571 Some(CaseLevel::Off) => {
572 self.set_case_level(Some(false));
573 }
574 _ => self.set_case_level(None),
575 }
576 }
577
578 fn case_first(self) -> CollationCaseFirst {
579 if (self.0 & CollatorOptionsBitField::CASE_FIRST_MASK) != 0 {
580 if (self.0 & CollatorOptionsBitField::UPPER_FIRST_MASK) != 0 {
581 CollationCaseFirst::Upper
582 } else {
583 CollationCaseFirst::Lower
584 }
585 } else {
586 CollationCaseFirst::False
587 }
588 }
589
590 /// Whether case is the most significant part of the tertiary
591 /// level.
592 ///
593 /// See [the ICU guide](https://unicode-org.github.io/icu/userguide/collation/concepts.html#caselevel).
594 pub fn set_case_first(&mut self, case_first: Option<CollationCaseFirst>) {
595 self.0 &=
596 !(CollatorOptionsBitField::CASE_FIRST_MASK | CollatorOptionsBitField::UPPER_FIRST_MASK);
597 if let Some(case_first) = case_first {
598 self.0 |= CollatorOptionsBitField::EXPLICIT_CASE_FIRST_MASK;
599 match case_first {
600 CollationCaseFirst::False => {}
601 CollationCaseFirst::Lower => {
602 self.0 |= CollatorOptionsBitField::CASE_FIRST_MASK;
603 }
604 CollationCaseFirst::Upper => {
605 self.0 |= CollatorOptionsBitField::CASE_FIRST_MASK;
606 self.0 |= CollatorOptionsBitField::UPPER_FIRST_MASK;
607 }
608 _ => {
609 debug_assert!(false, "unknown variant `{case_first:?}`");
610 }
611 }
612 } else {
613 self.0 &= !CollatorOptionsBitField::EXPLICIT_CASE_FIRST_MASK;
614 }
615 }
616
617 /// Whether second level compares the last accent difference
618 /// instead of the first accent difference.
619 pub fn backward_second_level(self) -> bool {
620 (self.0 & CollatorOptionsBitField::BACKWARD_SECOND_LEVEL_MASK) != 0
621 }
622
623 /// Whether second level compares the last accent difference
624 /// instead of the first accent difference.
625 pub fn set_backward_second_level(&mut self, backward_second_level: Option<bool>) {
626 self.0 &= !CollatorOptionsBitField::BACKWARD_SECOND_LEVEL_MASK;
627 if let Some(backward_second_level) = backward_second_level {
628 self.0 |= CollatorOptionsBitField::EXPLICIT_BACKWARD_SECOND_LEVEL_MASK;
629 if backward_second_level {
630 self.0 |= CollatorOptionsBitField::BACKWARD_SECOND_LEVEL_MASK;
631 }
632 } else {
633 self.0 &= !CollatorOptionsBitField::EXPLICIT_BACKWARD_SECOND_LEVEL_MASK;
634 }
635 }
636
637 /// Whether sequences of decimal digits are compared according
638 /// to their numeric value.
639 pub fn numeric(self) -> bool {
640 (self.0 & CollatorOptionsBitField::NUMERIC_MASK) != 0
641 }
642
643 /// Whether sequences of decimal digits are compared according
644 /// to their numeric value.
645 pub fn set_numeric(&mut self, numeric: Option<bool>) {
646 self.0 &= !CollatorOptionsBitField::NUMERIC_MASK;
647 if let Some(numeric) = numeric {
648 self.0 |= CollatorOptionsBitField::EXPLICIT_NUMERIC_MASK;
649 if numeric {
650 self.0 |= CollatorOptionsBitField::NUMERIC_MASK;
651 }
652 } else {
653 self.0 &= !CollatorOptionsBitField::EXPLICIT_NUMERIC_MASK;
654 }
655 }
656
657 pub fn set_numeric_from_enum(&mut self, numeric: Option<CollationNumericOrdering>) {
658 match numeric {
659 Some(CollationNumericOrdering::True) => {
660 self.set_numeric(Some(true));
661 }
662 Some(CollationNumericOrdering::False) => {
663 self.set_numeric(Some(false));
664 }
665 Some(_) => {
666 debug_assert!(false, "unknown variant `{numeric:?}`");
667 self.set_numeric(Some(false));
668 }
669 None => self.set_numeric(None),
670 }
671 }
672
673 /// If strength is <= secondary, returns `None`.
674 /// Otherwise, returns the appropriate mask.
675 pub(crate) fn tertiary_mask(self) -> Option<u16> {
676 if self.strength() <= Strength::Secondary {
677 None
678 } else if (self.0
679 & (CollatorOptionsBitField::CASE_FIRST_MASK | CollatorOptionsBitField::CASE_LEVEL_MASK))
680 == CollatorOptionsBitField::CASE_FIRST_MASK
681 {
682 Some(CASE_MASK | TERTIARY_MASK)
683 } else {
684 Some(TERTIARY_MASK)
685 }
686 }
687
688 /// Internal upper first getter
689 pub(crate) fn upper_first(self) -> bool {
690 (self.0 & CollatorOptionsBitField::UPPER_FIRST_MASK) != 0
691 }
692
693 /// For options left as defaults in this `CollatorOptions`,
694 /// set the value from `other`. Values taken from `other`
695 /// are marked as explicitly set if they were explicitly
696 /// set in `other`.
697 pub fn set_defaults(&mut self, other: CollatorOptionsBitField) {
698 if self.0 & CollatorOptionsBitField::EXPLICIT_STRENGTH_MASK == 0 {
699 self.0 &= !CollatorOptionsBitField::STRENGTH_MASK;
700 self.0 |= other.0 & CollatorOptionsBitField::STRENGTH_MASK;
701 self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_STRENGTH_MASK;
702 }
703 if self.0 & CollatorOptionsBitField::EXPLICIT_MAX_VARIABLE_MASK == 0 {
704 self.0 &= !CollatorOptionsBitField::MAX_VARIABLE_MASK;
705 self.0 |= other.0 & CollatorOptionsBitField::MAX_VARIABLE_MASK;
706 self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_MAX_VARIABLE_MASK;
707 }
708 if self.0 & CollatorOptionsBitField::EXPLICIT_ALTERNATE_HANDLING_MASK == 0 {
709 self.0 &= !CollatorOptionsBitField::ALTERNATE_HANDLING_MASK;
710 self.0 |= other.0 & CollatorOptionsBitField::ALTERNATE_HANDLING_MASK;
711 self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_ALTERNATE_HANDLING_MASK;
712 }
713 if self.0 & CollatorOptionsBitField::EXPLICIT_CASE_LEVEL_MASK == 0 {
714 self.0 &= !CollatorOptionsBitField::CASE_LEVEL_MASK;
715 self.0 |= other.0 & CollatorOptionsBitField::CASE_LEVEL_MASK;
716 self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_CASE_LEVEL_MASK;
717 }
718 if self.0 & CollatorOptionsBitField::EXPLICIT_CASE_FIRST_MASK == 0 {
719 self.0 &= !(CollatorOptionsBitField::CASE_FIRST_MASK
720 | CollatorOptionsBitField::UPPER_FIRST_MASK);
721 self.0 |= other.0
722 & (CollatorOptionsBitField::CASE_FIRST_MASK
723 | CollatorOptionsBitField::UPPER_FIRST_MASK);
724 self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_CASE_FIRST_MASK;
725 }
726 if self.0 & CollatorOptionsBitField::EXPLICIT_BACKWARD_SECOND_LEVEL_MASK == 0 {
727 self.0 &= !CollatorOptionsBitField::BACKWARD_SECOND_LEVEL_MASK;
728 self.0 |= other.0 & CollatorOptionsBitField::BACKWARD_SECOND_LEVEL_MASK;
729 self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_BACKWARD_SECOND_LEVEL_MASK;
730 }
731 if self.0 & CollatorOptionsBitField::EXPLICIT_NUMERIC_MASK == 0 {
732 self.0 &= !CollatorOptionsBitField::NUMERIC_MASK;
733 self.0 |= other.0 & CollatorOptionsBitField::NUMERIC_MASK;
734 self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_NUMERIC_MASK;
735 }
736 }
737}
738
739impl From<CollatorOptions> for CollatorOptionsBitField {
740 fn from(options: CollatorOptions) -> CollatorOptionsBitField {
741 let mut result = Self::default();
742 result.set_strength(options.strength);
743 result.set_max_variable(options.max_variable);
744 result.set_alternate_handling(options.alternate_handling);
745 result.set_case_level_from_enum(options.case_level);
746 result
747 }
748}