icu_collections/codepointtrie/
toml.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Utilities for reading CodePointTrie data from TOML files.
6
7use crate::codepointtrie::error::Error;
8use crate::codepointtrie::CodePointTrie;
9use crate::codepointtrie::CodePointTrieHeader;
10use crate::codepointtrie::TrieType;
11use crate::codepointtrie::TrieValue;
12use alloc::string::String;
13use alloc::vec::Vec;
14use core::convert::TryFrom;
15use zerovec::ZeroVec;
16
17/// A Serde-compatible struct for reading serialized [`CodePointTrie`] TOML files
18/// generated by ICU4C.
19///
20/// Use `TryInto` to convert [`CodePointTrieToml`] to a proper [`CodePointTrie`].
21#[derive(serde::Deserialize)]
22pub struct CodePointTrieToml {
23    #[serde(skip)]
24    _short_name: String,
25    #[serde(skip)]
26    _long_name: String,
27    #[serde(skip)]
28    _name: String,
29    index: Vec<u16>,
30    data_8: Option<Vec<u8>>,
31    data_16: Option<Vec<u16>>,
32    data_32: Option<Vec<u32>>,
33    #[serde(skip)]
34    _index_length: u32,
35    #[serde(skip)]
36    _data_length: u32,
37    #[serde(rename = "highStart")]
38    high_start: u32,
39    #[serde(rename = "shifted12HighStart")]
40    shifted12_high_start: u16,
41    #[serde(rename = "type")]
42    trie_type_enum_val: u8,
43    #[serde(rename = "valueWidth")]
44    _value_width_enum_val: u8,
45    #[serde(rename = "index3NullOffset")]
46    index3_null_offset: u16,
47    #[serde(rename = "dataNullOffset")]
48    data_null_offset: u32,
49    #[serde(rename = "nullValue")]
50    null_value: u32,
51}
52
53/// Data slice from a [`CodePointTrie`] TOML.
54///
55/// ICU4C exports data as either `u8`, `u16`, or `u32`, which may be converted
56/// to other types as appropriate.
57#[allow(clippy::exhaustive_enums)] // based on a stable serialized form
58pub enum CodePointDataSlice<'a> {
59    /// A serialized [`CodePointTrie`] data array 8-bit values.
60    U8(&'a [u8]),
61    /// A serialized [`CodePointTrie`] data array 16-bit values.
62    U16(&'a [u16]),
63    /// A serialized [`CodePointTrie`] data array 32-bit values.
64    U32(&'a [u32]),
65}
66
67impl CodePointTrieToml {
68    /// Gets the `index` slice.
69    pub fn index_slice(&self) -> &[u16] {
70        self.index.as_slice()
71    }
72
73    /// Gets the `data` slice.
74    pub fn data_slice(&self) -> Result<CodePointDataSlice<'_>, Error> {
75        if let Some(data_8) = &self.data_8 {
76            Ok(CodePointDataSlice::U8(data_8.as_slice()))
77        } else if let Some(data_16) = &self.data_16 {
78            Ok(CodePointDataSlice::U16(data_16.as_slice()))
79        } else if let Some(data_32) = &self.data_32 {
80            Ok(CodePointDataSlice::U32(data_32.as_slice()))
81        } else {
82            Err(Error::FromDeserialized {
83                reason: "Did not find data array for CodePointTrie in TOML",
84            })
85        }
86    }
87}
88
89impl TryFrom<&CodePointTrieToml> for CodePointTrieHeader {
90    type Error = Error;
91
92    fn try_from(cpt_data: &CodePointTrieToml) -> Result<Self, Self::Error> {
93        let trie_type_enum: TrieType = TrieType::try_from(cpt_data.trie_type_enum_val)?;
94        Ok(CodePointTrieHeader {
95            high_start: cpt_data.high_start,
96            shifted12_high_start: cpt_data.shifted12_high_start,
97            index3_null_offset: cpt_data.index3_null_offset,
98            data_null_offset: cpt_data.data_null_offset,
99            null_value: cpt_data.null_value,
100            trie_type: trie_type_enum,
101        })
102    }
103}
104
105impl<T: TrieValue> TryFrom<&CodePointTrieToml> for CodePointTrie<'static, T> {
106    type Error = Error;
107
108    fn try_from(cpt_data: &CodePointTrieToml) -> Result<CodePointTrie<'static, T>, Self::Error> {
109        use CodePointDataSlice::*;
110        let header = CodePointTrieHeader::try_from(cpt_data)?;
111        let index: ZeroVec<u16> = ZeroVec::alloc_from_slice(&cpt_data.index);
112        let data: Result<ZeroVec<'static, T>, T::TryFromU32Error> = match cpt_data.data_slice()? {
113            U8(s) => s.iter().map(|i| T::try_from_u32(*i as u32)).collect(),
114            U16(s) => s.iter().map(|i| T::try_from_u32(*i as u32)).collect(),
115            U32(s) => s.iter().map(|i| T::try_from_u32(*i)).collect(),
116        };
117
118        let data = data.map_err(|_| Error::FromDeserialized {
119            reason: "Could not parse data array to typed array",
120        })?;
121        CodePointTrie::<T>::try_new(header, index, data)
122    }
123}