icu_properties/
bidi.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::{props::EnumeratedProperty, provider::PropertyEnumBidiMirroringGlyphV1};
6use icu_collections::codepointtrie::TrieValue;
7use zerovec::ule::{AsULE, RawBytesULE};
8
9/// This is a bitpacked combination of the `Bidi_Mirroring_Glyph`,
10/// `Bidi_Mirrored`, and `Bidi_Paired_Bracket_Type` properties.
11#[derive(Debug, Eq, PartialEq, Clone, Copy, Default)]
12#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
13#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
14#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
15#[allow(clippy::exhaustive_structs)] // needed for baked construction
16pub struct BidiMirroringGlyph {
17    /// The mirroring glyph
18    pub mirroring_glyph: Option<char>,
19    /// Whether the glyph is mirrored
20    pub mirrored: bool,
21    /// The paired bracket type
22    pub paired_bracket_type: BidiPairedBracketType,
23}
24
25impl EnumeratedProperty for BidiMirroringGlyph {
26    type DataMarker = PropertyEnumBidiMirroringGlyphV1;
27    #[cfg(feature = "compiled_data")]
28    const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> =
29        crate::provider::Baked::SINGLETON_PROPERTY_ENUM_BIDI_MIRRORING_GLYPH_V1;
30    const NAME: &'static [u8] = b"Bidi_Mirroring_Glyph";
31    const SHORT_NAME: &'static [u8] = b"Bidi_Mirroring_Glyph";
32}
33
34impl crate::private::Sealed for BidiMirroringGlyph {}
35
36impl AsULE for BidiMirroringGlyph {
37    type ULE = zerovec::ule::RawBytesULE<3>;
38
39    fn to_unaligned(self) -> Self::ULE {
40        let [a, b, c, _] = TrieValue::to_u32(self).to_le_bytes();
41        RawBytesULE([a, b, c])
42    }
43    fn from_unaligned(unaligned: Self::ULE) -> Self {
44        let [a, b, c] = unaligned.0;
45        TrieValue::try_from_u32(u32::from_le_bytes([a, b, c, 0])).unwrap_or_default()
46    }
47}
48
49/// The enum represents Bidi_Paired_Bracket_Type.
50///
51/// It does not implement [`EnumeratedProperty`], instead it can be obtained
52/// through the bitpacked [`BidiMirroringGlyph`] property.
53///
54/// If you have a use case this property without also needing the [`BidiMirroringGlyph`]
55/// property, and need to optimize data size, please file an issue.
56#[derive(Debug, Eq, PartialEq, Copy, Clone, Default)]
57#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
58#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
59#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
60#[non_exhaustive]
61pub enum BidiPairedBracketType {
62    /// Represents Bidi_Paired_Bracket_Type=Open.
63    Open,
64    /// Represents Bidi_Paired_Bracket_Type=Close.
65    Close,
66    /// Represents Bidi_Paired_Bracket_Type=None.
67    #[default]
68    None,
69}
70
71/// Implements [`unicode_bidi::BidiDataSource`] on [`CodePointMapDataBorrowed<BidiClass>`](crate::CodePointMapDataBorrowed).
72///
73/// ✨ *Enabled with the `unicode_bidi` Cargo feature.*
74///
75/// # Examples
76///
77///```
78/// use icu::properties::CodePointMapData;
79/// use icu::properties::props::BidiClass;
80/// use unicode_bidi::BidiInfo;
81///
82/// // This example text is defined using `concat!` because some browsers
83/// // and text editors have trouble displaying bidi strings.
84/// let text =  concat!["א", // RTL#1
85///                     "ב", // RTL#2
86///                     "ג", // RTL#3
87///                     "a", // LTR#1
88///                     "b", // LTR#2
89///                     "c", // LTR#3
90///                     ]; //
91///
92///
93/// let bidi_map = CodePointMapData::<BidiClass>::new();
94///
95/// // Resolve embedding levels within the text.  Pass `None` to detect the
96/// // paragraph level automatically.
97/// let bidi_info = BidiInfo::new_with_data_source(&bidi_map, text, None);
98///
99/// // This paragraph has embedding level 1 because its first strong character is RTL.
100/// assert_eq!(bidi_info.paragraphs.len(), 1);
101/// let para = &bidi_info.paragraphs[0];
102/// assert_eq!(para.level.number(), 1);
103/// assert!(para.level.is_rtl());
104///
105/// // Re-ordering is done after wrapping each paragraph into a sequence of
106/// // lines. For this example, I'll just use a single line that spans the
107/// // entire paragraph.
108/// let line = para.range.clone();
109///
110/// let display = bidi_info.reorder_line(para, line);
111/// assert_eq!(display, concat!["a", // LTR#1
112///                             "b", // LTR#2
113///                             "c", // LTR#3
114///                             "ג", // RTL#3
115///                             "ב", // RTL#2
116///                             "א", // RTL#1
117///                             ]);
118/// ```
119#[cfg(feature = "unicode_bidi")]
120impl unicode_bidi::data_source::BidiDataSource
121    for crate::CodePointMapDataBorrowed<'_, crate::props::BidiClass>
122{
123    fn bidi_class(&self, c: char) -> unicode_bidi::BidiClass {
124        use crate::props::BidiClass;
125        match self.get(c) {
126            BidiClass::LeftToRight => unicode_bidi::BidiClass::L,
127            BidiClass::RightToLeft => unicode_bidi::BidiClass::R,
128            BidiClass::EuropeanNumber => unicode_bidi::BidiClass::EN,
129            BidiClass::EuropeanSeparator => unicode_bidi::BidiClass::ES,
130            BidiClass::EuropeanTerminator => unicode_bidi::BidiClass::ET,
131            BidiClass::ArabicNumber => unicode_bidi::BidiClass::AN,
132            BidiClass::CommonSeparator => unicode_bidi::BidiClass::CS,
133            BidiClass::ParagraphSeparator => unicode_bidi::BidiClass::B,
134            BidiClass::SegmentSeparator => unicode_bidi::BidiClass::S,
135            BidiClass::WhiteSpace => unicode_bidi::BidiClass::WS,
136            BidiClass::OtherNeutral => unicode_bidi::BidiClass::ON,
137            BidiClass::LeftToRightEmbedding => unicode_bidi::BidiClass::LRE,
138            BidiClass::LeftToRightOverride => unicode_bidi::BidiClass::LRO,
139            BidiClass::ArabicLetter => unicode_bidi::BidiClass::AL,
140            BidiClass::RightToLeftEmbedding => unicode_bidi::BidiClass::RLE,
141            BidiClass::RightToLeftOverride => unicode_bidi::BidiClass::RLO,
142            BidiClass::PopDirectionalFormat => unicode_bidi::BidiClass::PDF,
143            BidiClass::NonspacingMark => unicode_bidi::BidiClass::NSM,
144            BidiClass::BoundaryNeutral => unicode_bidi::BidiClass::BN,
145            BidiClass::FirstStrongIsolate => unicode_bidi::BidiClass::FSI,
146            BidiClass::LeftToRightIsolate => unicode_bidi::BidiClass::LRI,
147            BidiClass::RightToLeftIsolate => unicode_bidi::BidiClass::RLI,
148            BidiClass::PopDirectionalIsolate => unicode_bidi::BidiClass::PDI,
149            // This must not happen.
150            _ => unicode_bidi::BidiClass::ON,
151        }
152    }
153}