icu_provider_blob/
blob_schema.rs1use core::fmt::Write;
6use icu_provider::{marker::DataMarkerIdHash, prelude::*};
7use serde::Deserialize;
8use writeable::Writeable;
9use zerotrie::ZeroTrieSimpleAscii;
10use zerovec::vecs::{Index16, Index32, VarZeroSlice, VarZeroVecFormat, ZeroSlice};
11
12#[derive(serde::Deserialize, yoke::Yokeable)]
14#[yoke(prove_covariance_manually)]
15#[cfg_attr(feature = "export", derive(serde::Serialize))]
16#[derive(Debug, Clone)]
17pub(crate) enum BlobSchema<'data> {
18 V001(NeverSchema),
19 V002(NeverSchema),
20 V002Bigger(NeverSchema),
21 #[serde(borrow)]
22 V003(BlobSchemaV1<'data, Index16>),
23 #[serde(borrow)]
24 V003Bigger(BlobSchemaV1<'data, Index32>),
25}
26
27pub(crate) const REQUEST_SEPARATOR: char = '\x1E';
29pub(crate) const CHECKSUM_KEY: &[u8] = b"\0c";
30
31impl<'data> BlobSchema<'data> {
32 pub fn deserialize_and_check<D: serde::Deserializer<'data>>(
33 de: D,
34 ) -> Result<BlobSchema<'data>, D::Error> {
35 let blob = Self::deserialize(de)?;
36 #[cfg(debug_assertions)]
37 blob.check_invariants();
38 Ok(blob)
39 }
40
41 pub fn load(
42 &self,
43 marker: DataMarkerInfo,
44 req: DataRequest,
45 ) -> Result<(&'data [u8], Option<u64>), DataError> {
46 match self {
47 BlobSchema::V001(..) | BlobSchema::V002(..) | BlobSchema::V002Bigger(..) => {
48 unreachable!("Unreachable blob schema")
49 }
50 BlobSchema::V003(s) => s.load(marker, req),
51 BlobSchema::V003Bigger(s) => s.load(marker, req),
52 }
53 }
54
55 #[cfg(feature = "alloc")]
56 pub fn iter_ids(
57 &self,
58 marker: DataMarkerInfo,
59 ) -> Result<alloc::collections::BTreeSet<DataIdentifierCow>, DataError> {
60 match self {
61 BlobSchema::V001(..) | BlobSchema::V002(..) | BlobSchema::V002Bigger(..) => {
62 unreachable!("Unreachable blob schema")
63 }
64 BlobSchema::V003(s) => s.iter_ids(marker),
65 BlobSchema::V003Bigger(s) => s.iter_ids(marker),
66 }
67 }
68
69 #[cfg(debug_assertions)]
70 fn check_invariants(&self) {
71 match self {
72 BlobSchema::V001(..) | BlobSchema::V002(..) | BlobSchema::V002Bigger(..) => (),
73 BlobSchema::V003(s) => s.check_invariants(),
74 BlobSchema::V003Bigger(s) => s.check_invariants(),
75 }
76 }
77}
78
79#[cfg_attr(feature = "export", derive(serde::Serialize))]
80#[derive(Debug, Clone, yoke::Yokeable)]
81pub enum NeverSchema {}
82
83impl<'de> serde::Deserialize<'de> for NeverSchema {
84 fn deserialize<D>(_: D) -> Result<Self, D::Error>
85 where
86 D: serde::Deserializer<'de>,
87 {
88 use serde::de::Error;
89 Err(D::Error::custom("Attempted to read 1.0 blob format from ICU4X 2.0: please run ICU4X 2.0 datagen to generate a new file."))
90 }
91}
92#[derive(Clone, Copy, Debug, serde::Deserialize, yoke::Yokeable)]
98#[yoke(prove_covariance_manually)]
99#[cfg_attr(feature = "export", derive(serde::Serialize))]
100#[serde(bound = "")] pub(crate) struct BlobSchemaV1<'data, LocaleVecFormat: VarZeroVecFormat> {
102 #[serde(borrow)]
105 pub markers: &'data ZeroSlice<DataMarkerIdHash>,
106 #[serde(borrow)]
112 pub locales: &'data VarZeroSlice<[u8], LocaleVecFormat>,
113 #[serde(borrow)]
115 pub buffers: &'data VarZeroSlice<[u8], Index32>,
116}
117
118impl<LocaleVecFormat: VarZeroVecFormat> Default for BlobSchemaV1<'_, LocaleVecFormat> {
119 fn default() -> Self {
120 Self {
121 markers: ZeroSlice::new_empty(),
122 locales: VarZeroSlice::new_empty(),
123 buffers: VarZeroSlice::new_empty(),
124 }
125 }
126}
127
128impl<'data, LocaleVecFormat: VarZeroVecFormat> BlobSchemaV1<'data, LocaleVecFormat> {
129 pub fn load(
130 &self,
131 marker: DataMarkerInfo,
132 req: DataRequest,
133 ) -> Result<(&'data [u8], Option<u64>), DataError> {
134 if marker.is_singleton && !req.id.locale.is_unknown() {
135 return Err(DataErrorKind::InvalidRequest.with_req(marker, req));
136 }
137 let marker_index = self
138 .markers
139 .binary_search(&marker.id.hashed())
140 .ok()
141 .ok_or_else(|| DataErrorKind::MarkerNotFound.with_req(marker, req))?;
142 let zerotrie = self
143 .locales
144 .get(marker_index)
145 .ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(marker, req))?;
146 let mut cursor = ZeroTrieSimpleAscii::from_store(zerotrie).into_cursor();
147 let _infallible_ascii = req.id.locale.write_to(&mut cursor);
148 let blob_index = if !req.id.marker_attributes.is_empty() {
149 let _infallible_ascii = cursor.write_char(REQUEST_SEPARATOR);
150 req.id
151 .marker_attributes
152 .write_to(&mut cursor)
153 .map_err(|_| DataErrorKind::IdentifierNotFound.with_req(marker, req))?;
154 loop {
155 if let Some(v) = cursor.take_value() {
156 break Some(v);
157 }
158 if !req.metadata.attributes_prefix_match || cursor.probe(0).is_none() {
159 break None;
160 }
161 }
162 } else {
163 cursor.take_value()
164 }
165 .ok_or_else(|| DataErrorKind::IdentifierNotFound.with_req(marker, req))?;
166 let buffer = self
167 .buffers
168 .get(blob_index)
169 .ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(marker, req))?;
170 Ok((
171 buffer,
172 marker
173 .has_checksum
174 .then(|| self.get_checksum(zerotrie))
175 .flatten(),
176 ))
177 }
178
179 fn get_checksum(&self, zerotrie: &[u8]) -> Option<u64> {
180 ZeroTrieSimpleAscii::from_store(zerotrie)
181 .get(CHECKSUM_KEY)
182 .and_then(|cs| Some(u64::from_le_bytes(self.buffers.get(cs)?.try_into().ok()?)))
183 }
184
185 #[cfg(feature = "alloc")]
186 pub fn iter_ids(
187 &self,
188 marker: DataMarkerInfo,
189 ) -> Result<alloc::collections::BTreeSet<DataIdentifierCow>, DataError> {
190 let marker_index = self
191 .markers
192 .binary_search(&marker.id.hashed())
193 .ok()
194 .ok_or_else(|| DataErrorKind::MarkerNotFound.with_marker(marker))?;
195 let zerotrie = self
196 .locales
197 .get(marker_index)
198 .ok_or_else(|| DataError::custom("Invalid blob bytes").with_marker(marker))?;
199 Ok(ZeroTrieSimpleAscii::from_store(zerotrie)
200 .iter()
201 .filter_map(|(s, _)| {
202 #[allow(unused_imports)]
203 use alloc::borrow::ToOwned;
204 if let Some((locale, attrs)) = s.split_once(REQUEST_SEPARATOR) {
205 Some(DataIdentifierCow::from_owned(
206 DataMarkerAttributes::try_from_str(attrs).ok()?.to_owned(),
207 locale.parse().ok()?,
208 ))
209 } else if s.as_bytes() == CHECKSUM_KEY {
210 None
211 } else {
212 Some(DataIdentifierCow::from_locale(s.parse().ok()?))
213 }
214 })
215 .collect())
216 }
217
218 #[cfg(debug_assertions)]
220 fn check_invariants(&self) {
221 if self.markers.is_empty() && self.locales.is_empty() && self.buffers.is_empty() {
222 return;
223 }
224 debug_assert_eq!(self.markers.len(), self.locales.len());
225 let mut seen_min = self.buffers.is_empty();
228 let mut seen_max = self.buffers.is_empty();
229 for zerotrie in self.locales.iter() {
230 for (_locale, idx) in ZeroTrieSimpleAscii::from_store(zerotrie).iter() {
231 debug_assert!(idx < self.buffers.len());
232 if idx == 0 {
233 seen_min = true;
234 }
235 if idx + 1 == self.buffers.len() {
236 seen_max = true;
237 }
238 }
239 }
240 debug_assert!(seen_min);
241 debug_assert!(seen_max);
242 }
243}