swc_atoms/
wtf8_atom.rs

1use std::{
2    borrow::Cow,
3    fmt::{self, Formatter},
4    ops::Deref,
5};
6
7use hstr::wtf8::{Wtf8, Wtf8Buf};
8use serde::Serializer;
9
10use crate::Atom;
11
12/// Clone-on-write WTF-8 string.
13///
14///
15/// See [tendril] for more details.
16#[derive(Clone, Default, PartialEq, Eq, Hash)]
17#[cfg_attr(feature = "rkyv-impl", derive(bytecheck::CheckBytes))]
18#[repr(transparent)]
19pub struct Wtf8Atom(pub(super) hstr::Wtf8Atom);
20
21#[cfg(feature = "encoding-impl")]
22impl cbor4ii::core::enc::Encode for Wtf8Atom {
23    #[inline]
24    fn encode<W: cbor4ii::core::enc::Write>(
25        &self,
26        writer: &mut W,
27    ) -> Result<(), cbor4ii::core::enc::Error<W::Error>> {
28        cbor4ii::core::types::Bytes(self.as_bytes()).encode(writer)
29    }
30}
31
32#[cfg(feature = "encoding-impl")]
33impl<'de> cbor4ii::core::dec::Decode<'de> for Wtf8Atom {
34    #[inline]
35    fn decode<R: cbor4ii::core::dec::Read<'de>>(
36        reader: &mut R,
37    ) -> Result<Self, cbor4ii::core::dec::Error<R::Error>> {
38        let s = <cbor4ii::core::types::Bytes<&[u8]>>::decode(reader)?;
39
40        // This is not sound, maybe Wtf8Buf should make bytes operations safe
41        Ok(Self(hstr::Wtf8Atom::from(unsafe {
42            Wtf8Buf::from_bytes_unchecked(s.0.into())
43        })))
44    }
45}
46
47#[cfg(feature = "arbitrary")]
48#[cfg_attr(docsrs, doc(cfg(feature = "arbitrary")))]
49impl<'a> arbitrary::Arbitrary<'a> for Wtf8Atom {
50    fn arbitrary(u: &mut arbitrary::Unstructured<'_>) -> arbitrary::Result<Self> {
51        let sym = u.arbitrary::<Vec<u8>>()?;
52        if sym.is_empty() {
53            return Err(arbitrary::Error::NotEnoughData);
54        }
55        Ok(Self(hstr::Wtf8Atom::from(unsafe {
56            Wtf8Buf::from_bytes_unchecked(sym)
57        })))
58    }
59}
60
61fn _asserts() {
62    // let _static_assert_size_eq = std::mem::transmute::<Atom, [usize; 1]>;
63
64    fn _assert_send<T: Send>() {}
65    fn _assert_sync<T: Sync>() {}
66
67    _assert_sync::<Wtf8Atom>();
68    _assert_send::<Wtf8Atom>();
69}
70
71impl Wtf8Atom {
72    /// Creates a new [Wtf8Atom] from a string.
73    #[inline(always)]
74    pub fn new<S>(s: S) -> Self
75    where
76        hstr::Wtf8Atom: From<S>,
77    {
78        Wtf8Atom(hstr::Wtf8Atom::from(s))
79    }
80
81    pub fn as_wtf8(&self) -> &Wtf8 {
82        &self.0
83    }
84
85    pub fn as_atom(&self) -> Option<&Atom> {
86        if self.as_str().is_some() {
87            Some(unsafe { &*(self as *const Wtf8Atom as *const Atom) })
88        } else {
89            None
90        }
91    }
92
93    /// Returns the UTF-8 [`Atom`] representation, borrowing when possible.
94    pub fn to_atom_lossy(&self) -> Cow<'_, Atom> {
95        if let Some(atom) = self.as_atom() {
96            return Cow::Borrowed(atom);
97        }
98        Cow::Owned(Atom::new(self.to_string_lossy()))
99    }
100
101    /// Try to convert this to a UTF-8 [Atom].
102    ///
103    /// Returns [Atom] if the string is valid UTF-8, otherwise returns
104    /// the original [Wtf8Atom].
105    pub fn try_into_atom(self) -> Result<Atom, Wtf8Atom> {
106        self.0.try_into_atom().map(Atom).map_err(Wtf8Atom)
107    }
108
109    /// Creates a new [Wtf8Atom] from a byte slice.
110    ///
111    /// # Safety
112    ///
113    /// The caller must ensure that `bytes` is a well-formed WTF-8 byte
114    /// sequence.
115    ///
116    /// See [hstr::wtf8::Wtf8::from_bytes_unchecked] for more details.
117    pub unsafe fn from_bytes_unchecked(bytes: &[u8]) -> Self {
118        Wtf8Atom(hstr::Wtf8Atom::from(
119            hstr::wtf8::Wtf8::from_bytes_unchecked(bytes),
120        ))
121    }
122}
123
124impl Deref for Wtf8Atom {
125    type Target = Wtf8;
126
127    #[inline]
128    fn deref(&self) -> &Self::Target {
129        &self.0
130    }
131}
132
133impl fmt::Debug for Wtf8Atom {
134    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
135        fmt::Debug::fmt(&**self, f)
136    }
137}
138
139impl PartialOrd for Wtf8Atom {
140    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
141        Some(self.cmp(other))
142    }
143}
144
145impl Ord for Wtf8Atom {
146    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
147        self.as_wtf8().cmp(other.as_wtf8())
148    }
149}
150
151impl<T> PartialEq<T> for Wtf8Atom
152where
153    hstr::Wtf8Atom: PartialEq<T>,
154    T: ?Sized,
155{
156    fn eq(&self, other: &T) -> bool {
157        self.0.eq(other)
158    }
159}
160
161impl<T> From<T> for Wtf8Atom
162where
163    hstr::Wtf8Atom: From<T>,
164{
165    fn from(s: T) -> Self {
166        Wtf8Atom::new(s)
167    }
168}
169
170impl From<&Wtf8Atom> for Wtf8Buf {
171    fn from(s: &Wtf8Atom) -> Self {
172        // SAFETY: `Wtf8Atom` is guaranteed to be valid WTF-8 byte sequence.
173        unsafe { Wtf8Buf::from_bytes_unchecked(s.as_bytes().to_vec()) }
174    }
175}
176
177impl serde::ser::Serialize for Wtf8Atom {
178    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
179    where
180        S: Serializer,
181    {
182        self.0.serialize(serializer)
183    }
184}
185
186impl<'de> serde::de::Deserialize<'de> for Wtf8Atom {
187    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
188    where
189        D: serde::Deserializer<'de>,
190    {
191        hstr::Wtf8Atom::deserialize(deserializer).map(Wtf8Atom)
192    }
193}
194
195/// NOT A PUBLIC API
196#[cfg(feature = "rkyv-impl")]
197impl rkyv::Archive for Wtf8Atom {
198    type Archived = rkyv::vec::ArchivedVec<u8>;
199    type Resolver = rkyv::vec::VecResolver;
200
201    #[allow(clippy::unit_arg)]
202    fn resolve(&self, resolver: Self::Resolver, out: rkyv::Place<Self::Archived>) {
203        rkyv::vec::ArchivedVec::<u8>::resolve_from_slice(self.as_bytes(), resolver, out)
204    }
205}
206
207/// NOT A PUBLIC API
208#[cfg(feature = "rkyv-impl")]
209impl<S: rancor::Fallible + rkyv::ser::Writer + rkyv::ser::Allocator + ?Sized> rkyv::Serialize<S>
210    for Wtf8Atom
211where
212    <S as rancor::Fallible>::Error: rancor::Source,
213{
214    fn serialize(&self, serializer: &mut S) -> Result<Self::Resolver, S::Error> {
215        rkyv::vec::ArchivedVec::<u8>::serialize_from_slice(self.as_bytes(), serializer)
216    }
217}
218
219/// NOT A PUBLIC API
220#[cfg(feature = "rkyv-impl")]
221impl<D> rkyv::Deserialize<Wtf8Atom, D> for rkyv::vec::ArchivedVec<u8>
222where
223    D: ?Sized + rancor::Fallible,
224    <D as rancor::Fallible>::Error: rancor::Source,
225{
226    fn deserialize(&self, _: &mut D) -> Result<Wtf8Atom, <D as rancor::Fallible>::Error> {
227        Ok(Wtf8Atom::new(
228            // SAFETY: `ArchivedVec<u8>` is guaranteed to be serialized with `Wtf8Atom` byte
229            // sequence.  `Wtf8Atom` byte sequence is identical to `Wtf8` byte sequence.
230            unsafe { Wtf8::from_bytes_unchecked(self.as_slice()) },
231        ))
232    }
233}
234
235/// noop
236#[cfg(feature = "shrink-to-fit")]
237impl shrink_to_fit::ShrinkToFit for Wtf8Atom {
238    #[inline(always)]
239    fn shrink_to_fit(&mut self) {}
240}
241
242#[cfg(test)]
243mod tests {
244    use super::*;
245
246    #[test]
247    fn to_atom_lossy_returns_borrowed_for_utf8() {
248        let atom = Atom::from("swc");
249        let wtf = Wtf8Atom::from(atom.clone());
250
251        match wtf.to_atom_lossy() {
252            Cow::Borrowed(borrowed) => assert_eq!(borrowed, &atom),
253            Cow::Owned(_) => panic!("expected a borrowed Atom for valid UTF-8 input"),
254        }
255    }
256
257    #[test]
258    fn to_atom_lossy_returns_owned_for_invalid_utf8() {
259        let invalid_bytes = vec![0xed, 0xa0, 0x80];
260        let invalid = unsafe { Wtf8Buf::from_bytes_unchecked(invalid_bytes.clone()) };
261        let wtf = Wtf8Atom::new(invalid);
262
263        let lossy = wtf.to_string_lossy();
264
265        match wtf.to_atom_lossy() {
266            Cow::Borrowed(_) => panic!("expected an owned Atom for invalid UTF-8 input"),
267            Cow::Owned(atom) => {
268                assert_eq!(atom.as_ref(), lossy);
269            }
270        }
271    }
272}