icu_collections/codepointtrie/
toml.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Utilities for reading CodePointTrie data from TOML files.
6
7use crate::codepointtrie::error::Error;
8use crate::codepointtrie::CodePointTrie;
9use crate::codepointtrie::CodePointTrieHeader;
10use crate::codepointtrie::TrieType;
11use crate::codepointtrie::TrieValue;
12use alloc::string::String;
13use alloc::vec::Vec;
14use core::convert::TryFrom;
15use zerovec::ZeroVec;
16
17/// A Serde-compatible struct for reading serialized [`CodePointTrie`] TOML files
18/// generated by ICU4C.
19///
20/// Use `TryInto` to convert [`CodePointTrieToml`] to a proper [`CodePointTrie`].
21#[allow(clippy::upper_case_acronyms)]
22#[derive(serde::Deserialize)]
23pub struct CodePointTrieToml {
24    #[serde(skip)]
25    _short_name: String,
26    #[serde(skip)]
27    _long_name: String,
28    #[serde(skip)]
29    _name: String,
30    index: Vec<u16>,
31    data_8: Option<Vec<u8>>,
32    data_16: Option<Vec<u16>>,
33    data_32: Option<Vec<u32>>,
34    #[serde(skip)]
35    _index_length: u32,
36    #[serde(skip)]
37    _data_length: u32,
38    #[serde(rename = "highStart")]
39    high_start: u32,
40    #[serde(rename = "shifted12HighStart")]
41    shifted12_high_start: u16,
42    #[serde(rename = "type")]
43    trie_type_enum_val: u8,
44    #[serde(rename = "valueWidth")]
45    _value_width_enum_val: u8,
46    #[serde(rename = "index3NullOffset")]
47    index3_null_offset: u16,
48    #[serde(rename = "dataNullOffset")]
49    data_null_offset: u32,
50    #[serde(rename = "nullValue")]
51    null_value: u32,
52}
53
54/// Data slice from a [`CodePointTrie`] TOML.
55///
56/// ICU4C exports data as either `u8`, `u16`, or `u32`, which may be converted
57/// to other types as appropriate.
58#[allow(clippy::exhaustive_enums)] // based on a stable serialized form
59pub enum CodePointDataSlice<'a> {
60    /// A serialized [`CodePointTrie`] data array 8-bit values.
61    U8(&'a [u8]),
62    /// A serialized [`CodePointTrie`] data array 16-bit values.
63    U16(&'a [u16]),
64    /// A serialized [`CodePointTrie`] data array 32-bit values.
65    U32(&'a [u32]),
66}
67
68impl CodePointTrieToml {
69    /// Gets the `index` slice.
70    pub fn index_slice(&self) -> &[u16] {
71        self.index.as_slice()
72    }
73
74    /// Gets the `data` slice.
75    pub fn data_slice(&self) -> Result<CodePointDataSlice, Error> {
76        if let Some(data_8) = &self.data_8 {
77            Ok(CodePointDataSlice::U8(data_8.as_slice()))
78        } else if let Some(data_16) = &self.data_16 {
79            Ok(CodePointDataSlice::U16(data_16.as_slice()))
80        } else if let Some(data_32) = &self.data_32 {
81            Ok(CodePointDataSlice::U32(data_32.as_slice()))
82        } else {
83            Err(Error::FromDeserialized {
84                reason: "Did not find data array for CodePointTrie in TOML",
85            })
86        }
87    }
88}
89
90impl TryFrom<&CodePointTrieToml> for CodePointTrieHeader {
91    type Error = Error;
92
93    fn try_from(cpt_data: &CodePointTrieToml) -> Result<Self, Self::Error> {
94        let trie_type_enum: TrieType = TrieType::try_from(cpt_data.trie_type_enum_val)?;
95        Ok(CodePointTrieHeader {
96            high_start: cpt_data.high_start,
97            shifted12_high_start: cpt_data.shifted12_high_start,
98            index3_null_offset: cpt_data.index3_null_offset,
99            data_null_offset: cpt_data.data_null_offset,
100            null_value: cpt_data.null_value,
101            trie_type: trie_type_enum,
102        })
103    }
104}
105
106impl<T: TrieValue> TryFrom<&CodePointTrieToml> for CodePointTrie<'static, T> {
107    type Error = Error;
108
109    fn try_from(cpt_data: &CodePointTrieToml) -> Result<CodePointTrie<'static, T>, Self::Error> {
110        use CodePointDataSlice::*;
111        let header = CodePointTrieHeader::try_from(cpt_data)?;
112        let index: ZeroVec<u16> = ZeroVec::alloc_from_slice(&cpt_data.index);
113        let data: Result<ZeroVec<'static, T>, T::TryFromU32Error> = match cpt_data.data_slice()? {
114            U8(s) => s.iter().map(|i| T::try_from_u32(*i as u32)).collect(),
115            U16(s) => s.iter().map(|i| T::try_from_u32(*i as u32)).collect(),
116            U32(s) => s.iter().map(|i| T::try_from_u32(*i)).collect(),
117        };
118
119        let data = data.map_err(|_| Error::FromDeserialized {
120            reason: "Could not parse data array to typed array",
121        })?;
122        CodePointTrie::<T>::try_new(header, index, data)
123    }
124}