1use core::cmp::Ordering;
6use core::fmt;
7
8#[repr(transparent)]
33#[allow(clippy::exhaustive_structs)] #[derive(PartialEq, Eq, Clone, Copy, Hash)]
35pub struct PotentialCodePoint([u8; 3]);
36
37impl PotentialCodePoint {
38 #[inline]
49 pub const fn from_char(c: char) -> Self {
50 let [u0, u1, u2, _u3] = (c as u32).to_le_bytes();
51 Self([u0, u1, u2])
52 }
53
54 #[inline]
56 pub const fn from_u24(c: u32) -> Self {
57 let [u0, u1, u2, _u3] = c.to_le_bytes();
58 Self([u0, u1, u2])
59 }
60
61 #[inline]
76 pub fn try_to_char(self) -> Result<char, core::char::CharTryFromError> {
77 char::try_from(u32::from(self))
78 }
79
80 #[inline]
93 pub fn to_char_lossy(self) -> char {
94 self.try_to_char().unwrap_or(char::REPLACEMENT_CHARACTER)
95 }
96
97 #[inline]
113 pub unsafe fn to_char_unchecked(self) -> char {
114 char::from_u32_unchecked(u32::from(self))
115 }
116
117 #[inline]
121 #[cfg(feature = "zerovec")]
122 pub const fn to_unaligned(self) -> zerovec::ule::RawBytesULE<3> {
123 zerovec::ule::RawBytesULE(self.0)
124 }
125}
126
127#[cfg(feature = "zerovec")]
129impl zerovec::ule::AsULE for PotentialCodePoint {
130 type ULE = zerovec::ule::RawBytesULE<3>;
131
132 #[inline]
133 fn to_unaligned(self) -> Self::ULE {
134 zerovec::ule::RawBytesULE(self.0)
135 }
136
137 #[inline]
138 fn from_unaligned(unaligned: Self::ULE) -> Self {
139 Self(unaligned.0)
140 }
141}
142
143#[cfg(feature = "zerovec")]
147unsafe impl zerovec::ule::EqULE for PotentialCodePoint {}
148
149impl fmt::Debug for PotentialCodePoint {
150 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
151 match self.try_to_char() {
153 Ok(c) => fmt::Debug::fmt(&c, f),
154 Err(_) => fmt::Debug::fmt(&self.0, f),
155 }
156 }
157}
158
159impl PartialOrd for PotentialCodePoint {
160 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
161 Some(self.cmp(other))
162 }
163}
164
165impl PartialEq<char> for PotentialCodePoint {
166 fn eq(&self, other: &char) -> bool {
167 self.eq(&Self::from_char(*other))
168 }
169}
170
171impl PartialOrd<char> for PotentialCodePoint {
172 fn partial_cmp(&self, other: &char) -> Option<Ordering> {
173 self.partial_cmp(&Self::from_char(*other))
174 }
175}
176
177impl PartialEq<PotentialCodePoint> for char {
178 fn eq(&self, other: &PotentialCodePoint) -> bool {
179 PotentialCodePoint::from_char(*self).eq(other)
180 }
181}
182
183impl PartialOrd<PotentialCodePoint> for char {
184 fn partial_cmp(&self, other: &PotentialCodePoint) -> Option<Ordering> {
185 PotentialCodePoint::from_char(*self).partial_cmp(other)
186 }
187}
188
189impl Ord for PotentialCodePoint {
190 fn cmp(&self, other: &Self) -> Ordering {
192 let a = u32::from(*self);
193 let b = u32::from(*other);
194 a.cmp(&b)
195 }
196}
197
198impl From<PotentialCodePoint> for u32 {
199 fn from(x: PotentialCodePoint) -> Self {
200 let [a0, a1, a2] = x.0;
201 u32::from_le_bytes([a0, a1, a2, 0])
202 }
203}
204
205impl TryFrom<u32> for PotentialCodePoint {
206 type Error = ();
207 fn try_from(x: u32) -> Result<Self, ()> {
208 let [u0, u1, u2, u3] = x.to_le_bytes();
209 if u3 != 0 {
210 return Err(());
211 }
212 Ok(Self([u0, u1, u2]))
213 }
214}
215
216impl From<char> for PotentialCodePoint {
217 #[inline]
218 fn from(value: char) -> Self {
219 Self::from_char(value)
220 }
221}
222
223impl TryFrom<PotentialCodePoint> for char {
224 type Error = core::char::CharTryFromError;
225
226 #[inline]
227 fn try_from(value: PotentialCodePoint) -> Result<char, Self::Error> {
228 value.try_to_char()
229 }
230}
231
232#[cfg(feature = "serde")]
234impl serde::Serialize for PotentialCodePoint {
235 fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
236 where
237 S: serde::Serializer,
238 {
239 use serde::ser::Error;
240 let c = self
241 .try_to_char()
242 .map_err(|_| S::Error::custom("invalid Unicode scalar value in PotentialCodePoint"))?;
243 if serializer.is_human_readable() {
244 serializer.serialize_char(c)
245 } else {
246 self.0.serialize(serializer)
247 }
248 }
249}
250
251#[cfg(feature = "serde")]
253impl<'de> serde::Deserialize<'de> for PotentialCodePoint {
254 fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
255 where
256 D: serde::Deserializer<'de>,
257 {
258 if deserializer.is_human_readable() {
259 let c = <char>::deserialize(deserializer)?;
260 Ok(PotentialCodePoint::from_char(c))
261 } else {
262 let bytes = <[u8; 3]>::deserialize(deserializer)?;
263 Ok(PotentialCodePoint(bytes))
264 }
265 }
266}
267
268#[cfg(feature = "databake")]
270impl databake::Bake for PotentialCodePoint {
271 fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream {
272 match self.try_to_char() {
273 Ok(ch) => {
274 env.insert("potential_utf");
275 let ch = ch.bake(env);
276 databake::quote! {
277 potential_utf::PotentialCodePoint::from_char(#ch)
278 }
279 }
280 Err(_) => {
281 env.insert("potential_utf");
282 let u24 = u32::from_le_bytes([self.0[0], self.0[1], self.0[2], 0]);
283 databake::quote! {
284 potential_utf::PotentialCodePoint::from_u24(#u24)
285 }
286 }
287 }
288 }
289}
290
291#[cfg(test)]
292mod test {
293 use super::*;
294 use zerovec::ZeroVec;
295
296 #[test]
297 fn test_serde_fail() {
298 let uc = PotentialCodePoint([0xFF, 0xFF, 0xFF]);
299 serde_json::to_string(&uc).expect_err("serialize invalid char bytes");
300 bincode::serialize(&uc).expect_err("serialize invalid char bytes");
301 }
302
303 #[test]
304 fn test_serde_json() {
305 let c = '🙃';
306 let uc = PotentialCodePoint::from_char(c);
307 let json_ser = serde_json::to_string(&uc).unwrap();
308
309 assert_eq!(json_ser, r#""🙃""#);
310
311 let json_de: PotentialCodePoint = serde_json::from_str(&json_ser).unwrap();
312
313 assert_eq!(uc, json_de);
314 }
315
316 #[test]
317 fn test_serde_bincode() {
318 let c = '🙃';
319 let uc = PotentialCodePoint::from_char(c);
320 let bytes_ser = bincode::serialize(&uc).unwrap();
321
322 assert_eq!(bytes_ser, [0x43, 0xF6, 0x01]);
323
324 let bytes_de: PotentialCodePoint = bincode::deserialize(&bytes_ser).unwrap();
325
326 assert_eq!(uc, bytes_de);
327 }
328
329 #[test]
330 fn test_representation() {
331 let chars = ['w', 'ω', '文', '𑄃', '🙃'];
332
333 let uvchars: Vec<_> = chars
335 .iter()
336 .copied()
337 .map(PotentialCodePoint::from_char)
338 .collect();
339 let zvec: ZeroVec<_> = uvchars.clone().into_iter().collect();
341
342 let ule_bytes = zvec.as_bytes();
343 let uvbytes;
344 unsafe {
345 let ptr = &uvchars[..] as *const _ as *const u8;
346 uvbytes = core::slice::from_raw_parts(ptr, ule_bytes.len());
347 }
348
349 assert_eq!(uvbytes, ule_bytes);
352
353 assert_eq!(
354 &[119, 0, 0, 201, 3, 0, 135, 101, 0, 3, 17, 1, 67, 246, 1],
355 ule_bytes
356 );
357 }
358
359 #[test]
360 fn test_char_bake() {
361 databake::test_bake!(
362 PotentialCodePoint,
363 const,
364 crate::PotentialCodePoint::from_char('b'),
365 potential_utf
366 );
367 databake::test_bake!(
369 PotentialCodePoint,
370 const,
371 crate::PotentialCodePoint::from_u24(55296u32),
372 potential_utf
373 );
374 }
375}