bytestring/
lib.rs

1//! A UTF-8 encoded read-only string using `Bytes` as storage.
2//!
3//! See docs for [`ByteString`].
4
5#![no_std]
6
7extern crate alloc;
8
9use alloc::{
10    boxed::Box,
11    string::{String, ToString},
12    vec::Vec,
13};
14use core::{borrow::Borrow, fmt, hash, ops, str};
15
16use bytes::Bytes;
17
18/// An immutable UTF-8 encoded string using [`Bytes`] as the storage.
19#[derive(Clone, Default, Eq, PartialOrd, Ord)]
20pub struct ByteString(Bytes);
21
22impl ByteString {
23    /// Creates a new empty `ByteString`.
24    pub const fn new() -> Self {
25        ByteString(Bytes::new())
26    }
27
28    /// Get a reference to the underlying `Bytes` object.
29    pub fn as_bytes(&self) -> &Bytes {
30        &self.0
31    }
32
33    /// Unwraps this `ByteString` into the underlying `Bytes` object.
34    pub fn into_bytes(self) -> Bytes {
35        self.0
36    }
37
38    /// Creates a new `ByteString` from a `&'static str`.
39    pub const fn from_static(src: &'static str) -> ByteString {
40        Self(Bytes::from_static(src.as_bytes()))
41    }
42
43    /// Creates a new `ByteString` from a Bytes.
44    ///
45    /// # Safety
46    /// This function is unsafe because it does not check the bytes passed to it are valid UTF-8.
47    /// If this constraint is violated, it may cause memory unsafety issues with future users of
48    /// the `ByteString`, as we assume that `ByteString`s are valid UTF-8. However, the most likely
49    /// issue is that the data gets corrupted.
50    pub const unsafe fn from_bytes_unchecked(src: Bytes) -> ByteString {
51        Self(src)
52    }
53
54    /// Divides one bytestring into two at an index, returning both parts.
55    ///
56    /// # Panics
57    ///
58    /// Panics if `mid` is not on a UTF-8 code point boundary, or if it is past the end of the last
59    /// code point of the bytestring.
60    pub fn split_at(&self, mid: usize) -> (ByteString, ByteString) {
61        let this: &str = self.as_ref();
62        let _valid_midpoint_check = this.split_at(mid);
63
64        let mut bytes = self.0.clone();
65        let first = bytes.split_to(mid);
66        let last = bytes;
67
68        unsafe {
69            (
70                ByteString::from_bytes_unchecked(first),
71                ByteString::from_bytes_unchecked(last),
72            )
73        }
74    }
75
76    /// Returns a new `ByteString` that is equivalent to the given `subset`.
77    ///
78    /// When processing a `ByteString` buffer with other tools, one often gets a `&str` which is in
79    /// fact a slice of the original `ByteString`; i.e., a subset of it. This function turns that
80    /// `&str` into another `ByteString`, as if one had sliced the `ByteString` with the offsets
81    /// that correspond to `subset`.
82    ///
83    /// Corresponds to [`Bytes::slice_ref`].
84    ///
85    /// This operation is `O(1)`.
86    ///
87    /// # Panics
88    ///
89    /// Panics if `subset` is not a sub-slice of this byte string.
90    ///
91    /// Note that strings which are only subsets from an equality perspective do not uphold this
92    /// requirement; see examples.
93    ///
94    /// # Examples
95    ///
96    /// ```
97    /// # use bytestring::ByteString;
98    /// let string = ByteString::from_static(" foo ");
99    /// let subset = string.trim();
100    /// let substring = string.slice_ref(subset);
101    /// assert_eq!(substring, "foo");
102    /// ```
103    ///
104    /// ```should_panic
105    /// # use bytestring::ByteString;
106    /// // panics because the given slice is not derived from the original byte string, despite
107    /// // being a logical subset of the string
108    /// ByteString::from_static("foo bar").slice_ref("foo");
109    /// ```
110    pub fn slice_ref(&self, subset: &str) -> Self {
111        Self(self.0.slice_ref(subset.as_bytes()))
112    }
113}
114
115impl PartialEq<str> for ByteString {
116    fn eq(&self, other: &str) -> bool {
117        &self[..] == other
118    }
119}
120
121impl<T: AsRef<str>> PartialEq<T> for ByteString {
122    fn eq(&self, other: &T) -> bool {
123        &self[..] == other.as_ref()
124    }
125}
126
127impl AsRef<ByteString> for ByteString {
128    fn as_ref(&self) -> &ByteString {
129        self
130    }
131}
132
133impl AsRef<[u8]> for ByteString {
134    fn as_ref(&self) -> &[u8] {
135        self.0.as_ref()
136    }
137}
138
139impl AsRef<str> for ByteString {
140    fn as_ref(&self) -> &str {
141        self
142    }
143}
144
145impl hash::Hash for ByteString {
146    fn hash<H: hash::Hasher>(&self, state: &mut H) {
147        (**self).hash(state);
148    }
149}
150
151impl ops::Deref for ByteString {
152    type Target = str;
153
154    #[inline]
155    fn deref(&self) -> &str {
156        let bytes = self.0.as_ref();
157        // SAFETY: UTF-8 validity is guaranteed during construction.
158        unsafe { str::from_utf8_unchecked(bytes) }
159    }
160}
161
162impl Borrow<str> for ByteString {
163    fn borrow(&self) -> &str {
164        self
165    }
166}
167
168impl From<String> for ByteString {
169    #[inline]
170    fn from(value: String) -> Self {
171        Self(Bytes::from(value))
172    }
173}
174
175impl From<&str> for ByteString {
176    #[inline]
177    fn from(value: &str) -> Self {
178        Self(Bytes::copy_from_slice(value.as_ref()))
179    }
180}
181
182impl From<Box<str>> for ByteString {
183    #[inline]
184    fn from(value: Box<str>) -> Self {
185        Self(Bytes::from(value.into_boxed_bytes()))
186    }
187}
188
189impl From<ByteString> for String {
190    #[inline]
191    fn from(value: ByteString) -> Self {
192        value.to_string()
193    }
194}
195
196impl TryFrom<&[u8]> for ByteString {
197    type Error = str::Utf8Error;
198
199    #[inline]
200    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
201        let _ = str::from_utf8(value)?;
202        Ok(ByteString(Bytes::copy_from_slice(value)))
203    }
204}
205
206impl TryFrom<Vec<u8>> for ByteString {
207    type Error = str::Utf8Error;
208
209    #[inline]
210    fn try_from(value: Vec<u8>) -> Result<Self, Self::Error> {
211        let buf = String::from_utf8(value).map_err(|err| err.utf8_error())?;
212        Ok(ByteString(Bytes::from(buf)))
213    }
214}
215
216impl TryFrom<Bytes> for ByteString {
217    type Error = str::Utf8Error;
218
219    #[inline]
220    fn try_from(value: Bytes) -> Result<Self, Self::Error> {
221        let _ = str::from_utf8(value.as_ref())?;
222        Ok(ByteString(value))
223    }
224}
225
226impl TryFrom<bytes::BytesMut> for ByteString {
227    type Error = str::Utf8Error;
228
229    #[inline]
230    fn try_from(value: bytes::BytesMut) -> Result<Self, Self::Error> {
231        let _ = str::from_utf8(&value)?;
232        Ok(ByteString(value.freeze()))
233    }
234}
235
236macro_rules! array_impls {
237    ($($len:expr)+) => {
238        $(
239            impl TryFrom<[u8; $len]> for ByteString {
240                type Error = str::Utf8Error;
241
242                #[inline]
243                fn try_from(value: [u8; $len]) -> Result<Self, Self::Error> {
244                    ByteString::try_from(&value[..])
245                }
246            }
247
248            impl TryFrom<&[u8; $len]> for ByteString {
249                type Error = str::Utf8Error;
250
251                #[inline]
252                fn try_from(value: &[u8; $len]) -> Result<Self, Self::Error> {
253                    ByteString::try_from(&value[..])
254                }
255            }
256        )+
257    }
258}
259
260array_impls!(0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32);
261
262impl fmt::Debug for ByteString {
263    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
264        (**self).fmt(fmt)
265    }
266}
267
268impl fmt::Display for ByteString {
269    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
270        (**self).fmt(fmt)
271    }
272}
273
274#[cfg(feature = "serde")]
275mod serde {
276    use alloc::string::String;
277
278    use serde_core::{
279        de::{Deserialize, Deserializer},
280        ser::{Serialize, Serializer},
281    };
282
283    use super::ByteString;
284
285    impl Serialize for ByteString {
286        #[inline]
287        fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
288        where
289            S: Serializer,
290        {
291            serializer.serialize_str(self.as_ref())
292        }
293    }
294
295    impl<'de> Deserialize<'de> for ByteString {
296        #[inline]
297        fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
298        where
299            D: Deserializer<'de>,
300        {
301            String::deserialize(deserializer).map(ByteString::from)
302        }
303    }
304
305    #[cfg(test)]
306    mod serde_impl_tests {
307        use serde_core::de::DeserializeOwned;
308        use static_assertions::assert_impl_all;
309
310        use super::*;
311
312        assert_impl_all!(ByteString: Serialize, DeserializeOwned);
313    }
314}
315
316#[cfg(test)]
317mod test {
318    use alloc::{borrow::ToOwned, format, vec};
319    use core::{
320        hash::{Hash, Hasher},
321        panic::{RefUnwindSafe, UnwindSafe},
322    };
323
324    use ahash::AHasher;
325    use static_assertions::assert_impl_all;
326
327    use super::*;
328
329    assert_impl_all!(ByteString: Send, Sync, Unpin, Sized);
330    assert_impl_all!(ByteString: Clone, Default, Eq, PartialOrd, Ord);
331    assert_impl_all!(ByteString: fmt::Debug, fmt::Display);
332    assert_impl_all!(ByteString: UnwindSafe, RefUnwindSafe);
333
334    #[test]
335    fn eq() {
336        let s: ByteString = ByteString::from_static("test");
337        assert_eq!(s, "test");
338        assert_eq!(s, *"test");
339        assert_eq!(s, "test".to_owned());
340    }
341
342    #[test]
343    fn new() {
344        let _: ByteString = ByteString::new();
345    }
346
347    #[test]
348    fn as_bytes() {
349        let buf = ByteString::new();
350        assert!(buf.as_bytes().is_empty());
351
352        let buf = ByteString::from("hello");
353        assert_eq!(buf.as_bytes(), "hello");
354    }
355
356    #[test]
357    fn from_bytes_unchecked() {
358        let buf = unsafe { ByteString::from_bytes_unchecked(Bytes::new()) };
359        assert!(buf.is_empty());
360
361        let buf = unsafe { ByteString::from_bytes_unchecked(Bytes::from("hello")) };
362        assert_eq!(buf, "hello");
363    }
364
365    #[test]
366    fn as_ref() {
367        let buf = ByteString::new();
368
369        let _: &ByteString = buf.as_ref();
370        let _: &[u8] = buf.as_ref();
371    }
372
373    #[test]
374    fn borrow() {
375        let buf = ByteString::new();
376
377        let _: &str = buf.borrow();
378    }
379
380    #[test]
381    fn hash() {
382        let mut hasher1 = AHasher::default();
383        "str".hash(&mut hasher1);
384
385        let mut hasher2 = AHasher::default();
386        let s = ByteString::from_static("str");
387        s.hash(&mut hasher2);
388        assert_eq!(hasher1.finish(), hasher2.finish());
389    }
390
391    #[test]
392    fn from_string() {
393        let s: ByteString = "hello".to_owned().into();
394        assert_eq!(&s, "hello");
395        let t: &str = s.as_ref();
396        assert_eq!(t, "hello");
397    }
398
399    #[test]
400    fn from_str() {
401        let _: ByteString = "str".into();
402        let _: ByteString = "str".to_owned().into_boxed_str().into();
403    }
404
405    #[test]
406    fn to_string() {
407        let buf = ByteString::from("foo");
408        assert_eq!(String::from(buf), "foo");
409    }
410
411    #[test]
412    fn from_static_str() {
413        static _S: ByteString = ByteString::from_static("hello");
414        let _ = ByteString::from_static("str");
415    }
416
417    #[test]
418    fn try_from_slice() {
419        let _ = ByteString::try_from(b"nice bytes").unwrap();
420    }
421
422    #[test]
423    fn try_from_array() {
424        assert_eq!(
425            ByteString::try_from([b'h', b'i']).unwrap(),
426            ByteString::from_static("hi")
427        );
428    }
429
430    #[test]
431    fn try_from_vec() {
432        let _ = ByteString::try_from(vec![b'f', b'o', b'o']).unwrap();
433        ByteString::try_from(vec![0, 159, 146, 150]).unwrap_err();
434    }
435
436    #[test]
437    fn try_from_bytes() {
438        let _ = ByteString::try_from(Bytes::from_static(b"nice bytes")).unwrap();
439    }
440
441    #[test]
442    fn try_from_bytes_mut() {
443        let _ = ByteString::try_from(bytes::BytesMut::from(&b"nice bytes"[..])).unwrap();
444    }
445
446    #[test]
447    fn display() {
448        let buf = ByteString::from("bar");
449        assert_eq!(format!("{buf}"), "bar");
450    }
451
452    #[test]
453    fn debug() {
454        let buf = ByteString::from("baz");
455        assert_eq!(format!("{buf:?}"), r#""baz""#);
456    }
457
458    #[cfg(feature = "serde")]
459    #[test]
460    fn serialize() {
461        let s: ByteString = serde_json::from_str(r#""nice bytes""#).unwrap();
462        assert_eq!(s, "nice bytes");
463    }
464
465    #[cfg(feature = "serde")]
466    #[test]
467    fn deserialize() {
468        let s = serde_json::to_string(&ByteString::from_static("nice bytes")).unwrap();
469        assert_eq!(s, r#""nice bytes""#);
470    }
471
472    #[test]
473    fn slice_ref() {
474        let string = ByteString::from_static(" foo ");
475        let subset = string.trim();
476        // subset is derived from original byte string
477        let substring = string.slice_ref(subset);
478        assert_eq!(substring, "foo");
479    }
480
481    #[test]
482    #[should_panic]
483    fn slice_ref_catches_not_a_subset() {
484        // panics because the given slice is not derived from the original byte string, despite
485        // being a logical subset of the string
486        ByteString::from_static("foo bar").slice_ref("foo");
487    }
488
489    #[test]
490    fn split_at() {
491        let buf = ByteString::from_static("foo bar");
492
493        let (first, last) = buf.split_at(0);
494        assert_eq!(ByteString::from_static(""), first);
495        assert_eq!(ByteString::from_static("foo bar"), last);
496
497        let (first, last) = buf.split_at(4);
498        assert_eq!(ByteString::from_static("foo "), first);
499        assert_eq!(ByteString::from_static("bar"), last);
500
501        let (first, last) = buf.split_at(7);
502        assert_eq!(ByteString::from_static("foo bar"), first);
503        assert_eq!(ByteString::from_static(""), last);
504    }
505
506    #[test]
507    #[should_panic = "byte index 1 is not a char boundary;"]
508    fn split_at_invalid_code_point() {
509        ByteString::from_static("ยต").split_at(1);
510    }
511
512    #[test]
513    #[should_panic = "byte index 9 is out of bounds"]
514    fn split_at_outside_string() {
515        ByteString::from_static("foo").split_at(9);
516    }
517}