Thanks to visit codestin.com
Credit goes to docs.rs

Skip to main content

revision/implementations/
string.rs

1use core::str;
2
3use crate::{DeserializeRevisioned, Error, Revisioned, SerializeRevisioned};
4
5use super::vecs::serialize_bytes;
6
7impl SerializeRevisioned for String {
8	#[inline]
9	fn serialize_revisioned<W: std::io::Write>(&self, writer: &mut W) -> Result<(), Error> {
10		serialize_bytes(self.as_bytes(), writer)
11	}
12}
13
14impl DeserializeRevisioned for String {
15	/// Reads the length-prefixed byte payload in a single bulk `read_exact`
16	/// and validates it as UTF-8 in place, avoiding both the per-byte fallback
17	/// when `specialised-vectors` is disabled and the `Take::read_to_end`
18	/// overhead of the `Vec<u8>` specialised path.
19	#[inline]
20	fn deserialize_revisioned<R: std::io::Read>(reader: &mut R) -> Result<Self, Error> {
21		let len = usize::deserialize_revisioned(reader)?;
22		if len == 0 {
23			return Ok(String::new());
24		}
25		// Zero-initialise before handing the buffer to `read_exact`. Passing an
26		// uninitialised slice to `Read::read` is explicitly documented as UB,
27		// and constructing a `&mut [u8]` over uninitialised memory would itself
28		// be UB per `slice::from_raw_parts_mut`'s initialisation requirement.
29		// The memset is negligible compared to the pending I/O and matches the
30		// pattern used in `uuid.rs` and the numeric specialised Vec impls.
31		let mut buf = vec![0u8; len];
32		reader.read_exact(&mut buf).map_err(Error::Io)?;
33		String::from_utf8(buf).map_err(|x| Error::Utf8Error(x.utf8_error()))
34	}
35}
36
37impl Revisioned for String {
38	#[inline]
39	fn revision() -> u16 {
40		1
41	}
42}
43
44impl SerializeRevisioned for str {
45	#[inline]
46	fn serialize_revisioned<W: std::io::Write>(&self, writer: &mut W) -> Result<(), Error> {
47		serialize_bytes(self.as_bytes(), writer)
48	}
49}
50
51impl Revisioned for str {
52	#[inline]
53	fn revision() -> u16 {
54		1
55	}
56}
57
58impl SerializeRevisioned for char {
59	#[inline]
60	fn serialize_revisioned<W: std::io::Write>(&self, w: &mut W) -> Result<(), Error> {
61		let buffer = &mut [0u8; 4];
62		w.write_all(self.encode_utf8(buffer).as_bytes()).map_err(Error::Io)
63	}
64}
65
66impl DeserializeRevisioned for char {
67	#[inline]
68	fn deserialize_revisioned<R: std::io::Read>(r: &mut R) -> Result<Self, Error> {
69		let mut buffer = [0u8; 4];
70		r.read_exact(&mut buffer[..1]).map_err(Error::Io)?;
71
72		let len = CHAR_LENGTH[buffer[0] as usize];
73
74		if len == 0 {
75			return Err(Error::InvalidCharEncoding);
76		}
77
78		r.read_exact(&mut buffer[1..(len as usize)]).map_err(Error::Io)?;
79
80		str::from_utf8(&buffer[..(len as usize)])
81			.map_err(|_| Error::InvalidCharEncoding)
82			.map(|x| x.chars().next().unwrap())
83	}
84}
85
86impl Revisioned for char {
87	#[inline]
88	fn revision() -> u16 {
89		1
90	}
91}
92
93static CHAR_LENGTH: [u8; 256] = const {
94	let mut r = [0u8; 256];
95	let mut i = 0;
96	while i < 256 {
97		if i & 0b1000_0000 == 0 {
98			r[i] = 1;
99		} else if i & 0b1110_0000 == 0b1100_0000 {
100			r[i] = 2;
101		} else if i & 0b1111_0000 == 0b1110_0000 {
102			r[i] = 3;
103		} else if i & 0b1111_1000 == 0b1111_0000 {
104			r[i] = 4;
105		}
106
107		i += 1;
108	}
109
110	r
111};
112
113#[cfg(test)]
114mod tests {
115
116	use super::*;
117
118	use crate::implementations::assert_bincode_compat;
119
120	#[test]
121	fn test_string() {
122		let val = String::from("this is a test");
123		let mut mem: Vec<u8> = vec![];
124		val.serialize_revisioned(&mut mem).unwrap();
125		#[cfg(not(feature = "fixed-width-encoding"))]
126		assert_eq!(mem.len(), 15);
127		#[cfg(feature = "fixed-width-encoding")]
128		assert_eq!(mem.len(), 22);
129		let out =
130			<String as DeserializeRevisioned>::deserialize_revisioned(&mut mem.as_slice()).unwrap();
131		assert_eq!(val, out);
132	}
133
134	#[test]
135	fn test_char() {
136		let char = '𐃌';
137		let mut mem = Vec::new();
138		char.serialize_revisioned(&mut mem).unwrap();
139		let out = DeserializeRevisioned::deserialize_revisioned(&mut mem.as_slice()).unwrap();
140		assert_eq!(char, out);
141	}
142
143	#[test]
144	fn bincode_compat_char() {
145		assert_bincode_compat(&char::MAX);
146		assert_bincode_compat(&'\0');
147		assert_bincode_compat(&'z');
148		assert_bincode_compat(&'0');
149		// in the 0x7F - 0x07FF range
150		assert_bincode_compat(&'ʘ');
151		// in the 0x7FF - 0xFFFF range
152		assert_bincode_compat(&'ꚸ');
153		// in the 0xFFFF - 0x10FFFF range
154		assert_bincode_compat(&'𐃌');
155	}
156
157	#[test]
158	fn str_and_string_serialize_identically() {
159		// `str` and `String` must produce byte-for-byte identical output
160		// so that a `String` can round-trip values originally serialised
161		// from a `&str` (and vice versa). Cover empty, ASCII, multi-byte
162		// UTF-8, embedded NULs, and a longer payload that exercises the
163		// length-prefix encoding beyond a single byte.
164		let cases: &[&str] = &[
165			"",
166			"a",
167			"this is a test",
168			"unicode: 🚀🔥✨",
169			"with\0embedded\0nuls",
170			&"x".repeat(300),
171		];
172		for &s in cases {
173			let mut from_str: Vec<u8> = Vec::new();
174			<str as SerializeRevisioned>::serialize_revisioned(s, &mut from_str).unwrap();
175
176			let owned = s.to_owned();
177			let mut from_string: Vec<u8> = Vec::new();
178			owned.serialize_revisioned(&mut from_string).unwrap();
179
180			assert_eq!(
181				from_str, from_string,
182				"str and String serialisation diverged for input {:?}",
183				s
184			);
185
186			let out =
187				<String as DeserializeRevisioned>::deserialize_revisioned(&mut from_str.as_slice())
188					.unwrap();
189			assert_eq!(out, owned);
190		}
191	}
192
193	#[test]
194	fn bincode_compat_string() {
195		assert_bincode_compat(&char::MAX.to_string());
196		assert_bincode_compat(&'\0'.to_string());
197		assert_bincode_compat(&'z'.to_string());
198		assert_bincode_compat(&'0'.to_string());
199		// in the 0x7F - 0x07FF range
200		assert_bincode_compat(&'ʘ'.to_string());
201		// in the 0x7FF - 0xFFFF range
202		assert_bincode_compat(&'ꚸ'.to_string());
203		// in the 0xFFFF - 0x10FFFF range
204		assert_bincode_compat(&'𐃌'.to_string());
205	}
206}