icu_collator/
options.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5// The bit layout of `CollatorOptions` is adapted from ICU4C and, therefore,
6// is subject to the ICU license as described in LICENSE.
7
8//! This module contains the types that are part of the API for setting
9//! the options for the collator.
10
11use crate::{
12    elements::{CASE_MASK, TERTIARY_MASK},
13    preferences::CollationCaseFirst,
14    preferences::CollationNumericOrdering,
15    CollatorPreferences,
16};
17
18/// The collation strength that indicates how many levels to compare.
19///
20/// If an earlier level isn't equal, the earlier level is decisive.
21/// If the result is equal on a level, but the strength is higher,
22/// the comparison proceeds to the next level.
23///
24/// Note: The bit layout of `CollatorOptions` requires `Strength`
25/// to fit in 3 bits.
26#[derive(Eq, PartialEq, Debug, Copy, Clone, PartialOrd, Ord)]
27#[repr(u8)]
28#[non_exhaustive]
29pub enum Strength {
30    /// Compare only on the level of base letters. This level
31    /// corresponds to the ECMA-402 sensitivity "base" with
32    /// [`CaseLevel::Off`] (the default for [`CaseLevel`]) and
33    /// to ECMA-402 sensitivity "case" with [`CaseLevel::On`].
34    ///
35    /// ```
36    /// use icu::collator::{options::*, *};
37    ///
38    /// let mut options = CollatorOptions::default();
39    /// options.strength = Some(Strength::Primary);
40    /// let collator = Collator::try_new(Default::default(), options).unwrap();
41    /// assert_eq!(collator.compare("E", "é"), core::cmp::Ordering::Equal);
42    /// ```
43    Primary = 0,
44
45    /// Compare also on the secondary level, which corresponds
46    /// to diacritics in scripts that use them. This level corresponds
47    /// to the ECMA-402 sensitivity "accent".
48    ///
49    /// ```
50    /// use icu::collator::{options::*, *};
51    ///
52    /// let mut options = CollatorOptions::default();
53    /// options.strength = Some(Strength::Secondary);
54    /// let collator = Collator::try_new(Default::default(), options).unwrap();
55    /// assert_eq!(collator.compare("E", "e"), core::cmp::Ordering::Equal);
56    /// assert_eq!(collator.compare("e", "é"), core::cmp::Ordering::Less);
57    /// assert_eq!(collator.compare("あ", "ア"), core::cmp::Ordering::Equal);
58    /// assert_eq!(collator.compare("ァ", "ア"), core::cmp::Ordering::Equal);
59    /// assert_eq!(collator.compare("ア", "ア"), core::cmp::Ordering::Equal);
60    /// ```
61    Secondary = 1,
62
63    /// Compare also on the tertiary level. By default, if the separate
64    /// case level is disabled, this corresponds to case for bicameral
65    /// scripts. This level distinguishes Hiragana and Katakana. This
66    /// also captures other minor differences, such as half-width vs.
67    /// full-width when the Japanese tailoring isn't in use.
68    ///
69    /// This is the default comparison level and appropriate for
70    /// most scripts. This level corresponds to the ECMA-402
71    /// sensitivity "variant".
72    ///
73    /// ```
74    /// use icu::collator::{*, options::*};
75    /// use icu::locale::locale;
76    ///
77    /// let mut options = CollatorOptions::default();
78    /// options.strength = Some(Strength::Tertiary);
79    /// let collator =
80    ///   Collator::try_new(Default::default(),
81    ///                     options).unwrap();
82    /// assert_eq!(collator.compare("E", "e"),
83    ///            core::cmp::Ordering::Greater);
84    /// assert_eq!(collator.compare("e", "é"),
85    ///            core::cmp::Ordering::Less);
86    /// assert_eq!(collator.compare("あ", "ア"),
87    ///            core::cmp::Ordering::Less);
88    /// assert_eq!(collator.compare("ァ", "ア"),
89    ///            core::cmp::Ordering::Less);
90    /// assert_eq!(collator.compare("ア", "ア"),
91    ///            core::cmp::Ordering::Less);
92    /// assert_eq!(collator.compare("e", "e"), // Full-width e
93    ///            core::cmp::Ordering::Less);
94    ///
95    /// let ja_collator =
96    ///   Collator::try_new(locale!("ja").into(), options).unwrap();
97    /// assert_eq!(ja_collator.compare("E", "e"),
98    ///            core::cmp::Ordering::Greater);
99    /// assert_eq!(ja_collator.compare("e", "é"),
100    ///            core::cmp::Ordering::Less);
101    /// assert_eq!(ja_collator.compare("あ", "ア"),
102    ///            core::cmp::Ordering::Equal); // Unlike root!
103    /// assert_eq!(ja_collator.compare("ァ", "ア"),
104    ///            core::cmp::Ordering::Less);
105    /// assert_eq!(ja_collator.compare("ア", "ア"),
106    ///            core::cmp::Ordering::Equal); // Unlike root!
107    /// assert_eq!(ja_collator.compare("e", "e"), // Full-width e
108    ///            core::cmp::Ordering::Equal); // Unlike root!
109    /// ```
110    Tertiary = 2,
111
112    /// Compare also on the quaternary level. For Japanese, Higana
113    /// and Katakana are distinguished at the quaternary level. Also,
114    /// if `AlternateHandling::Shifted` is used, the collation
115    /// elements whose level gets shifted are shifted to this
116    /// level.
117    ///
118    /// ```
119    /// use icu::collator::{*, options::*};
120    /// use icu::locale::locale;
121    ///
122    /// let mut options = CollatorOptions::default();
123    /// options.strength = Some(Strength::Quaternary);
124    ///
125    /// let ja_collator =
126    ///   Collator::try_new(locale!("ja").into(), options).unwrap();
127    /// assert_eq!(ja_collator.compare("あ", "ア"),
128    ///            core::cmp::Ordering::Less);
129    /// assert_eq!(ja_collator.compare("ア", "ア"),
130    ///            core::cmp::Ordering::Equal);
131    /// assert_eq!(ja_collator.compare("e", "e"), // Full-width e
132    ///            core::cmp::Ordering::Equal);
133    ///
134    /// // Even this level doesn't distinguish everything,
135    /// // e.g. Hebrew cantillation marks are still ignored.
136    /// let collator =
137    ///   Collator::try_new(Default::default(),
138    ///                     options).unwrap();
139    /// assert_eq!(collator.compare("דחי", "דחי֭"),
140    ///            core::cmp::Ordering::Equal);
141    /// ```
142    // TODO: Thai example.
143    Quaternary = 3,
144
145    /// Compare the NFD form by code point order as the quinary
146    /// level. This level makes the comparison slower and should
147    /// not be used in the general case. However, it can be used
148    /// to distinguish full-width and half-width forms when the
149    /// Japanese tailoring is in use and to distinguish e.g.
150    /// Hebrew cantillation markse. Use this level if you need
151    /// JIS X 4061-1996 compliance for Japanese on the level of
152    /// distinguishing full-width and half-width forms.
153    ///
154    /// ```
155    /// use icu::collator::{*, options::*};
156    /// use icu::locale::locale;
157    ///
158    /// let mut options = CollatorOptions::default();
159    /// options.strength = Some(Strength::Identical);
160    ///
161    /// let ja_collator =
162    ///   Collator::try_new(locale!("ja").into(), options).unwrap();
163    /// assert_eq!(ja_collator.compare("ア", "ア"),
164    ///            core::cmp::Ordering::Less);
165    /// assert_eq!(ja_collator.compare("e", "e"), // Full-width e
166    ///            core::cmp::Ordering::Less);
167    ///
168    /// let collator =
169    ///   Collator::try_new(Default::default(),
170    ///                     options).unwrap();
171    /// assert_eq!(collator.compare("דחי", "דחי֭"),
172    ///            core::cmp::Ordering::Less);
173    /// ```
174    Identical = 7,
175}
176
177/// What to do about characters whose comparison level can be
178/// varied dynamically.
179#[derive(Eq, PartialEq, Debug, Copy, Clone, PartialOrd, Ord)]
180#[repr(u8)]
181#[non_exhaustive]
182pub enum AlternateHandling {
183    /// Keep the characters whose level can be varied on the
184    /// primary level.
185    NonIgnorable = 0,
186    /// Shift the characters at or below `MaxVariable` to the
187    /// quaternary level.
188    Shifted = 1,
189    // Possible future values: ShiftTrimmed, Blanked
190}
191
192/// What characters get shifted to the quaternary level
193/// with `AlternateHandling::Shifted`.
194#[derive(Eq, PartialEq, Debug, Copy, Clone)]
195#[repr(u8)] // This repr is necessary for transmute safety
196#[non_exhaustive]
197pub enum MaxVariable {
198    /// Characters classified as spaces are shifted.
199    Space = 0,
200    /// Characters classified as spaces or punctuation
201    /// are shifted.
202    Punctuation = 1,
203    /// Characters classified as spaces, punctuation,
204    /// or symbols are shifted.
205    Symbol = 2,
206    /// Characters classified as spaces, punctuation,
207    /// symbols, or currency symbols are shifted.
208    Currency = 3,
209}
210
211/// Whether to distinguish case in sorting, even for sorting levels higher
212/// than tertiary, without having to use tertiary level just to enable case level differences.
213#[derive(Eq, PartialEq, Debug, Copy, Clone)]
214#[repr(u8)]
215#[non_exhaustive]
216pub enum CaseLevel {
217    /// Leave off the case level option.  Case differences will be handled by default
218    /// in tertiary strength.
219    Off = 0,
220    /// Turn on the case level option, thereby making a separate level for case
221    /// differences, positioned between secondary and tertiary.
222    ///
223    /// When used together with [`Strength::Primary`], this corresponds to the
224    /// ECMA-402 sensitivity "case".
225    On = 1,
226}
227
228/// Options settable by the user of the API.
229///
230/// With the exception of reordering (BCP47 `kr`), options that can by implied by locale are
231/// set via [`CollatorPreferences`].
232///
233/// See the [spec](https://www.unicode.org/reports/tr35/tr35-collation.html#Setting_Options).
234///
235/// The setters take an `Option` so that `None` can be used to go back to default.
236///
237/// # Options
238///
239/// Examples for using the different options below can be found in the [crate-level docs](crate).
240///
241/// ## ECMA-402 Sensitivity
242///
243/// ECMA-402 `sensitivity` maps to a combination of [`Strength`] and [`CaseLevel`] as follows:
244///
245/// <dl>
246/// <dt><code>sensitivity: "base"</code></dt>
247/// <dd><a href="enum.Strength.html#variant.Primary"><code>Strength::Primary</code></a></dd>
248/// <dt><code>sensitivity: "accent"</code></dt>
249/// <dd><a href="enum.Strength.html#variant.Secondary"><code>Strength::Secondary</code></a></dd>
250/// <dt><code>sensitivity: "case"</code></dt>
251/// <dd><a href="enum.Strength.html#variant.Primary"><code>Strength::Primary</code></a> and <a href="enum.CaseLevel.html#variant.On"><code>CaseLevel::On</code></a></dd>
252/// <dt><code>sensitivity: "variant"</code></dt>
253/// <dd><a href="enum.Strength.html#variant.Tertiary"><code>Strength::Tertiary</code></a></dd>
254/// </dl>
255///
256/// ## Strength
257///
258/// This is the BCP47 key `ks`. The default is [`Strength::Tertiary`].
259///
260/// ## Alternate Handling
261///
262/// This is the BCP47 key `ka`. Note that `AlternateHandling::ShiftTrimmed` and
263/// `AlternateHandling::Blanked` are unimplemented. The default is
264/// [`AlternateHandling::NonIgnorable`], except
265/// for Thai, whose default is [`AlternateHandling::Shifted`].
266///
267/// ## Case Level
268///
269/// See the [spec](https://www.unicode.org/reports/tr35/tr35-collation.html#Case_Parameters).
270/// This is the BCP47 key `kc`. The default is [`CaseLevel::Off`].
271///
272/// # Unsupported BCP47 options
273///
274/// Reordering (BCP47 `kr`) currently cannot be set via the API and is implied
275/// by the locale of the collation. `kr` is prohibited by ECMA-402.
276///
277/// Backward second level (BCP47 `kb`) cannot be set via the API and is implied
278/// by the locale of the collation (in practice only `fr-CA` turns it on and it's
279/// off otherwise). `kb` is prohibited by ECMA-402.
280///
281/// Normalization is always enabled and cannot be turned off. Therefore, there
282/// is no option corresponding to BCP47 `kk`. `kk` is prohibited by ECMA-402.
283///
284/// Hiragana quaternary handling is part of the strength for the Japanese
285/// tailoring. The BCP47 key `kh` is unsupported. `kh` is deprecated and
286/// prohibited by ECMA-402.
287///
288/// Variable top (BCP47 `vt`) is unsupported (use Max Variable instead). `vt`
289/// is deprecated and prohibited by ECMA-402.
290///
291/// ## ECMA-402 Usage
292///
293/// ECMA-402 `usage: "search"` is represented as `-u-co-search` as part of the
294/// locale in ICU4X. However, neither ECMA-402 nor ICU4X provides prefix matching
295/// or substring matching API surface. This makes the utility of search collations
296/// very narrow: With `-u-co-search`, [`Strength::Primary`], and observing whether
297/// comparison output is [`core::cmp::Ordering::Equal`] (making no distinction between
298/// [`core::cmp::Ordering::Less`] and [`core::cmp::Ordering::Greater`]), it is
299/// possible to check if a set of human-readable strings contains a full-string
300/// fuzzy match of a user-entered string, where "fuzzy" means case-insensitive and
301/// accent-insensitive for scripts that have such concepts and something roughly
302/// similar for other scripts.
303///
304/// Due to the very limited utility, ICU4X data does not include search collations
305/// by default.
306#[non_exhaustive]
307#[derive(Debug, Copy, Clone, Default)]
308pub struct CollatorOptions {
309    /// User-specified strength collation option.
310    pub strength: Option<Strength>,
311    /// User-specified alternate handling collation option.
312    pub alternate_handling: Option<AlternateHandling>,
313    /// User-specified max variable collation option.
314    pub max_variable: Option<MaxVariable>,
315    /// User-specified case level collation option.
316    pub case_level: Option<CaseLevel>,
317}
318
319impl CollatorOptions {
320    /// Create a new `CollatorOptions` with the defaults.
321    pub const fn default() -> Self {
322        Self {
323            strength: None,
324            alternate_handling: None,
325            max_variable: None,
326            case_level: None,
327        }
328    }
329}
330
331// Make it possible to easily copy the resolved options of
332// one collator into another collator.
333impl From<ResolvedCollatorOptions> for CollatorOptions {
334    /// Convenience conversion for copying the options from an
335    /// existing collator into a new one (overriding any locale-provided
336    /// defaults of the new one!).
337    fn from(options: ResolvedCollatorOptions) -> CollatorOptions {
338        Self {
339            strength: Some(options.strength),
340            alternate_handling: Some(options.alternate_handling),
341            max_variable: Some(options.max_variable),
342            case_level: Some(options.case_level),
343        }
344    }
345}
346
347// Make it possible to easily copy the resolved preferences of
348// one collator into another collator.
349impl From<ResolvedCollatorOptions> for CollatorPreferences {
350    /// Convenience conversion for copying the preferences from an
351    /// existing collator into a new one.
352    ///
353    /// Note that some preferences may not be fully preserved when recovering them
354    /// from an already initialized collator e.g [`LocalePreferences`] and [`CollationType`], because
355    /// those are only relevant when loading the collation data.
356    ///
357    /// [`LocalePreferences`]: icu_locale_core::preferences::LocalePreferences
358    /// [`CollationType`]: crate::preferences::CollationType
359    fn from(options: ResolvedCollatorOptions) -> CollatorPreferences {
360        CollatorPreferences {
361            case_first: Some(options.case_first),
362            numeric_ordering: Some(options.numeric),
363            ..Default::default()
364        }
365    }
366}
367
368/// The resolved (actually used) options used by the collator.
369///
370/// See the documentation of `CollatorOptions`.
371#[non_exhaustive]
372#[derive(Debug, Copy, Clone)]
373pub struct ResolvedCollatorOptions {
374    /// Resolved strength collation option.
375    pub strength: Strength,
376    /// Resolved alternate handling collation option.
377    pub alternate_handling: AlternateHandling,
378    /// Resolved case first collation option.
379    pub case_first: CollationCaseFirst,
380    /// Resolved max variable collation option.
381    pub max_variable: MaxVariable,
382    /// Resolved case level collation option.
383    pub case_level: CaseLevel,
384    /// Resolved numeric collation option.
385    pub numeric: CollationNumericOrdering,
386}
387
388impl From<CollatorOptionsBitField> for ResolvedCollatorOptions {
389    fn from(options: CollatorOptionsBitField) -> ResolvedCollatorOptions {
390        Self {
391            strength: options.strength(),
392            alternate_handling: options.alternate_handling(),
393            case_first: options.case_first(),
394            max_variable: options.max_variable(),
395            case_level: if options.case_level() {
396                CaseLevel::On
397            } else {
398                CaseLevel::Off
399            },
400            numeric: if options.numeric() {
401                CollationNumericOrdering::True
402            } else {
403                CollationNumericOrdering::False
404            },
405            // `options.backward_second_level()` not exposed.
406        }
407    }
408}
409
410#[derive(Copy, Clone, Debug)]
411pub(crate) struct CollatorOptionsBitField(u32);
412
413impl Default for CollatorOptionsBitField {
414    fn default() -> Self {
415        Self::default()
416    }
417}
418
419impl CollatorOptionsBitField {
420    /// Bits 0..2 : Strength
421    const STRENGTH_MASK: u32 = 0b111;
422    /// Bits 3..4 : Alternate handling: 00 non-ignorable, 01 shifted,
423    ///             10 reserved for shift-trimmed, 11 reserved for blanked.
424    ///             In other words, bit 4 is currently always 0.
425    const ALTERNATE_HANDLING_MASK: u32 = 1 << 3;
426    /// Bits 5..6 : 2-bit max variable value to be shifted by `MAX_VARIABLE_SHIFT`.
427    const MAX_VARIABLE_MASK: u32 = 0b01100000;
428    const MAX_VARIABLE_SHIFT: u32 = 5;
429    /// Bit     7 : Reserved for extending max variable.
430    /// Bit     8 : Sort uppercase first if case level or case first is on.
431    const UPPER_FIRST_MASK: u32 = 1 << 8;
432    /// Bit     9 : Keep the case bits in the tertiary weight (they trump
433    ///             other tertiary values)
434    ///             unless case level is on (when they are *moved* into the separate case level).
435    ///             By default, the case bits are removed from the tertiary weight (ignored).
436    ///             When CASE_FIRST is off, UPPER_FIRST must be off too, corresponding to
437    ///             the tri-value UCOL_CASE_FIRST attribute: UCOL_OFF vs. UCOL_LOWER_FIRST vs.
438    ///             UCOL_UPPER_FIRST.
439    const CASE_FIRST_MASK: u32 = 1 << 9;
440    /// Bit    10 : Insert the case level between the secondary and tertiary levels.
441    const CASE_LEVEL_MASK: u32 = 1 << 10;
442    /// Bit    11 : Backward secondary level
443    const BACKWARD_SECOND_LEVEL_MASK: u32 = 1 << 11;
444    /// Bit    12 : Numeric
445    const NUMERIC_MASK: u32 = 1 << 12;
446
447    /// Whether strength is explicitly set.
448    const EXPLICIT_STRENGTH_MASK: u32 = 1 << 31;
449    /// Whether max variable is explicitly set.
450    const EXPLICIT_MAX_VARIABLE_MASK: u32 = 1 << 30;
451    /// Whether alternate handling is explicitly set.
452    const EXPLICIT_ALTERNATE_HANDLING_MASK: u32 = 1 << 29;
453    /// Whether case level is explicitly set.
454    const EXPLICIT_CASE_LEVEL_MASK: u32 = 1 << 28;
455    /// Whether case first is explicitly set.
456    const EXPLICIT_CASE_FIRST_MASK: u32 = 1 << 27;
457    /// Whether backward secondary is explicitly set.
458    const EXPLICIT_BACKWARD_SECOND_LEVEL_MASK: u32 = 1 << 26;
459    /// Whether numeric is explicitly set.
460    const EXPLICIT_NUMERIC_MASK: u32 = 1 << 25;
461
462    /// Create a new [`CollatorOptionsBitField`] with the defaults.
463    pub const fn default() -> Self {
464        Self(Strength::Tertiary as u32)
465    }
466
467    /// This is the BCP47 key `ks`.
468    pub fn strength(self) -> Strength {
469        let mut bits = self.0 & CollatorOptionsBitField::STRENGTH_MASK;
470        if !(bits <= 3 || bits == 7) {
471            debug_assert!(false, "Bad value for strength.");
472            // If the bits say higher than `Quaternary` but
473            // lower than `Identical`, clamp to `Quaternary`.
474            bits = 3;
475        }
476        // Safety: Strength is repr(u8) and has discriminants between 0 and 7. The
477        // above code ensures that, since the mask puts us `≤ 8`
478        unsafe { core::mem::transmute(bits as u8) }
479    }
480
481    /// This is the BCP47 key `ks`. See the enum for examples.
482    pub fn set_strength(&mut self, strength: Option<Strength>) {
483        self.0 &= !CollatorOptionsBitField::STRENGTH_MASK;
484        if let Some(strength) = strength {
485            self.0 |= CollatorOptionsBitField::EXPLICIT_STRENGTH_MASK;
486            self.0 |= strength as u32;
487        } else {
488            self.0 &= !CollatorOptionsBitField::EXPLICIT_STRENGTH_MASK;
489        }
490    }
491
492    /// The maximum character class that `AlternateHandling::Shifted`
493    /// applies to.
494    pub fn max_variable(self) -> MaxVariable {
495        // Safe, because we mask two bits and shift them to the low
496        // two bits and the enum has values for 0 to 3, inclusive.
497        unsafe {
498            core::mem::transmute(
499                ((self.0 & CollatorOptionsBitField::MAX_VARIABLE_MASK)
500                    >> CollatorOptionsBitField::MAX_VARIABLE_SHIFT) as u8,
501            )
502        }
503    }
504
505    /// The maximum character class that `AlternateHandling::Shifted`
506    /// applies to. See the enum for examples.
507    pub fn set_max_variable(&mut self, max_variable: Option<MaxVariable>) {
508        self.0 &= !CollatorOptionsBitField::MAX_VARIABLE_MASK;
509        if let Some(max_variable) = max_variable {
510            self.0 |= CollatorOptionsBitField::EXPLICIT_MAX_VARIABLE_MASK;
511            self.0 |= (max_variable as u32) << CollatorOptionsBitField::MAX_VARIABLE_SHIFT;
512        } else {
513            self.0 &= !CollatorOptionsBitField::EXPLICIT_MAX_VARIABLE_MASK;
514        }
515    }
516
517    /// Whether certain characters are moved from the primary level to
518    /// the quaternary level.
519    pub fn alternate_handling(self) -> AlternateHandling {
520        if (self.0 & CollatorOptionsBitField::ALTERNATE_HANDLING_MASK) != 0 {
521            AlternateHandling::Shifted
522        } else {
523            AlternateHandling::NonIgnorable
524        }
525    }
526
527    /// Whether certain characters are moved from the primary level to
528    /// the quaternary level. See the enum for examples.
529    pub fn set_alternate_handling(&mut self, alternate_handling: Option<AlternateHandling>) {
530        self.0 &= !CollatorOptionsBitField::ALTERNATE_HANDLING_MASK;
531        if let Some(alternate_handling) = alternate_handling {
532            self.0 |= CollatorOptionsBitField::EXPLICIT_ALTERNATE_HANDLING_MASK;
533            if alternate_handling == AlternateHandling::Shifted {
534                self.0 |= CollatorOptionsBitField::ALTERNATE_HANDLING_MASK;
535            }
536        } else {
537            self.0 &= !CollatorOptionsBitField::EXPLICIT_ALTERNATE_HANDLING_MASK;
538        }
539    }
540
541    /// Whether there's a dedicated case level.
542    pub fn case_level(self) -> bool {
543        (self.0 & CollatorOptionsBitField::CASE_LEVEL_MASK) != 0
544    }
545
546    /// Whether there's a dedicated case level. If `true`, detaches
547    /// the case aspect of the tertiary level and inserts it between
548    /// the secondary and tertiary levels. Can be combined with the
549    /// primary-only strength. Setting this to `true` with
550    /// `Strength::Primary` corresponds to the ECMA-402 sensitivity
551    /// "case".
552    ///
553    /// See [the ICU guide](https://unicode-org.github.io/icu/userguide/collation/concepts.html#caselevel).
554    pub fn set_case_level(&mut self, case_level: Option<bool>) {
555        self.0 &= !CollatorOptionsBitField::CASE_LEVEL_MASK;
556        if let Some(case_level) = case_level {
557            self.0 |= CollatorOptionsBitField::EXPLICIT_CASE_LEVEL_MASK;
558            if case_level {
559                self.0 |= CollatorOptionsBitField::CASE_LEVEL_MASK;
560            }
561        } else {
562            self.0 &= !CollatorOptionsBitField::EXPLICIT_CASE_LEVEL_MASK;
563        }
564    }
565
566    pub fn set_case_level_from_enum(&mut self, case_level: Option<CaseLevel>) {
567        match case_level {
568            Some(CaseLevel::On) => {
569                self.set_case_level(Some(true));
570            }
571            Some(CaseLevel::Off) => {
572                self.set_case_level(Some(false));
573            }
574            _ => self.set_case_level(None),
575        }
576    }
577
578    fn case_first(self) -> CollationCaseFirst {
579        if (self.0 & CollatorOptionsBitField::CASE_FIRST_MASK) != 0 {
580            if (self.0 & CollatorOptionsBitField::UPPER_FIRST_MASK) != 0 {
581                CollationCaseFirst::Upper
582            } else {
583                CollationCaseFirst::Lower
584            }
585        } else {
586            CollationCaseFirst::False
587        }
588    }
589
590    /// Whether case is the most significant part of the tertiary
591    /// level.
592    ///
593    /// See [the ICU guide](https://unicode-org.github.io/icu/userguide/collation/concepts.html#caselevel).
594    pub fn set_case_first(&mut self, case_first: Option<CollationCaseFirst>) {
595        self.0 &=
596            !(CollatorOptionsBitField::CASE_FIRST_MASK | CollatorOptionsBitField::UPPER_FIRST_MASK);
597        if let Some(case_first) = case_first {
598            self.0 |= CollatorOptionsBitField::EXPLICIT_CASE_FIRST_MASK;
599            match case_first {
600                CollationCaseFirst::False => {}
601                CollationCaseFirst::Lower => {
602                    self.0 |= CollatorOptionsBitField::CASE_FIRST_MASK;
603                }
604                CollationCaseFirst::Upper => {
605                    self.0 |= CollatorOptionsBitField::CASE_FIRST_MASK;
606                    self.0 |= CollatorOptionsBitField::UPPER_FIRST_MASK;
607                }
608                _ => {
609                    debug_assert!(false, "unknown variant `{case_first:?}`");
610                }
611            }
612        } else {
613            self.0 &= !CollatorOptionsBitField::EXPLICIT_CASE_FIRST_MASK;
614        }
615    }
616
617    /// Whether second level compares the last accent difference
618    /// instead of the first accent difference.
619    pub fn backward_second_level(self) -> bool {
620        (self.0 & CollatorOptionsBitField::BACKWARD_SECOND_LEVEL_MASK) != 0
621    }
622
623    /// Whether second level compares the last accent difference
624    /// instead of the first accent difference.
625    pub fn set_backward_second_level(&mut self, backward_second_level: Option<bool>) {
626        self.0 &= !CollatorOptionsBitField::BACKWARD_SECOND_LEVEL_MASK;
627        if let Some(backward_second_level) = backward_second_level {
628            self.0 |= CollatorOptionsBitField::EXPLICIT_BACKWARD_SECOND_LEVEL_MASK;
629            if backward_second_level {
630                self.0 |= CollatorOptionsBitField::BACKWARD_SECOND_LEVEL_MASK;
631            }
632        } else {
633            self.0 &= !CollatorOptionsBitField::EXPLICIT_BACKWARD_SECOND_LEVEL_MASK;
634        }
635    }
636
637    /// Whether sequences of decimal digits are compared according
638    /// to their numeric value.
639    pub fn numeric(self) -> bool {
640        (self.0 & CollatorOptionsBitField::NUMERIC_MASK) != 0
641    }
642
643    /// Whether sequences of decimal digits are compared according
644    /// to their numeric value.
645    pub fn set_numeric(&mut self, numeric: Option<bool>) {
646        self.0 &= !CollatorOptionsBitField::NUMERIC_MASK;
647        if let Some(numeric) = numeric {
648            self.0 |= CollatorOptionsBitField::EXPLICIT_NUMERIC_MASK;
649            if numeric {
650                self.0 |= CollatorOptionsBitField::NUMERIC_MASK;
651            }
652        } else {
653            self.0 &= !CollatorOptionsBitField::EXPLICIT_NUMERIC_MASK;
654        }
655    }
656
657    pub fn set_numeric_from_enum(&mut self, numeric: Option<CollationNumericOrdering>) {
658        match numeric {
659            Some(CollationNumericOrdering::True) => {
660                self.set_numeric(Some(true));
661            }
662            Some(CollationNumericOrdering::False) => {
663                self.set_numeric(Some(false));
664            }
665            Some(_) => {
666                debug_assert!(false, "unknown variant `{numeric:?}`");
667                self.set_numeric(Some(false));
668            }
669            None => self.set_numeric(None),
670        }
671    }
672
673    /// If strength is <= secondary, returns `None`.
674    /// Otherwise, returns the appropriate mask.
675    pub(crate) fn tertiary_mask(self) -> Option<u16> {
676        if self.strength() <= Strength::Secondary {
677            None
678        } else if (self.0
679            & (CollatorOptionsBitField::CASE_FIRST_MASK | CollatorOptionsBitField::CASE_LEVEL_MASK))
680            == CollatorOptionsBitField::CASE_FIRST_MASK
681        {
682            Some(CASE_MASK | TERTIARY_MASK)
683        } else {
684            Some(TERTIARY_MASK)
685        }
686    }
687
688    /// Internal upper first getter
689    pub(crate) fn upper_first(self) -> bool {
690        (self.0 & CollatorOptionsBitField::UPPER_FIRST_MASK) != 0
691    }
692
693    /// For options left as defaults in this `CollatorOptions`,
694    /// set the value from `other`. Values taken from `other`
695    /// are marked as explicitly set if they were explicitly
696    /// set in `other`.
697    pub fn set_defaults(&mut self, other: CollatorOptionsBitField) {
698        if self.0 & CollatorOptionsBitField::EXPLICIT_STRENGTH_MASK == 0 {
699            self.0 &= !CollatorOptionsBitField::STRENGTH_MASK;
700            self.0 |= other.0 & CollatorOptionsBitField::STRENGTH_MASK;
701            self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_STRENGTH_MASK;
702        }
703        if self.0 & CollatorOptionsBitField::EXPLICIT_MAX_VARIABLE_MASK == 0 {
704            self.0 &= !CollatorOptionsBitField::MAX_VARIABLE_MASK;
705            self.0 |= other.0 & CollatorOptionsBitField::MAX_VARIABLE_MASK;
706            self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_MAX_VARIABLE_MASK;
707        }
708        if self.0 & CollatorOptionsBitField::EXPLICIT_ALTERNATE_HANDLING_MASK == 0 {
709            self.0 &= !CollatorOptionsBitField::ALTERNATE_HANDLING_MASK;
710            self.0 |= other.0 & CollatorOptionsBitField::ALTERNATE_HANDLING_MASK;
711            self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_ALTERNATE_HANDLING_MASK;
712        }
713        if self.0 & CollatorOptionsBitField::EXPLICIT_CASE_LEVEL_MASK == 0 {
714            self.0 &= !CollatorOptionsBitField::CASE_LEVEL_MASK;
715            self.0 |= other.0 & CollatorOptionsBitField::CASE_LEVEL_MASK;
716            self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_CASE_LEVEL_MASK;
717        }
718        if self.0 & CollatorOptionsBitField::EXPLICIT_CASE_FIRST_MASK == 0 {
719            self.0 &= !(CollatorOptionsBitField::CASE_FIRST_MASK
720                | CollatorOptionsBitField::UPPER_FIRST_MASK);
721            self.0 |= other.0
722                & (CollatorOptionsBitField::CASE_FIRST_MASK
723                    | CollatorOptionsBitField::UPPER_FIRST_MASK);
724            self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_CASE_FIRST_MASK;
725        }
726        if self.0 & CollatorOptionsBitField::EXPLICIT_BACKWARD_SECOND_LEVEL_MASK == 0 {
727            self.0 &= !CollatorOptionsBitField::BACKWARD_SECOND_LEVEL_MASK;
728            self.0 |= other.0 & CollatorOptionsBitField::BACKWARD_SECOND_LEVEL_MASK;
729            self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_BACKWARD_SECOND_LEVEL_MASK;
730        }
731        if self.0 & CollatorOptionsBitField::EXPLICIT_NUMERIC_MASK == 0 {
732            self.0 &= !CollatorOptionsBitField::NUMERIC_MASK;
733            self.0 |= other.0 & CollatorOptionsBitField::NUMERIC_MASK;
734            self.0 |= other.0 & CollatorOptionsBitField::EXPLICIT_NUMERIC_MASK;
735        }
736    }
737}
738
739impl From<CollatorOptions> for CollatorOptionsBitField {
740    fn from(options: CollatorOptions) -> CollatorOptionsBitField {
741        let mut result = Self::default();
742        result.set_strength(options.strength);
743        result.set_max_variable(options.max_variable);
744        result.set_alternate_handling(options.alternate_handling);
745        result.set_case_level_from_enum(options.case_level);
746        result
747    }
748}