Skip to content

Commit 386a684

Browse files
committed
Refactor BytesWrapper away
Bytes itself is already a structure with a slice into some underlying bytes. Thus, we are adding a cheap way to directly modify the slice of the Bytes struct. To facilitate this, we now store the tuple (ptr, len) as a &[] slice within the Bytes struct. The representation of these is exactly identical to before, but it allows trivial manipulation of the slice via direct assignment or &mut &[u8] reference. The way the underlying field was exposed on the BytesWrapper struct was also unsound. Access to it was not wrapped within an unsafe method. The decoder code can now trivially clone() the given Bytes struct and carry it around, directly shrinking the slice size of the underlying Bytes instance as the data is being processed. Additionally, there now exist two helper methods for numbers and strings (read_string_bytes and read_number_bytes) which handle a bit of the boilerplate. Sadly it is necessary to always write "unsafe { buf.as_mut_slice() }", but this is necessary for soundness, we only can actually guarantee safety when calling the actual decode functions, as long as we want to carry Bytes around. Signed-off-by: Bob Weinand <[email protected]>
1 parent 1de65da commit 386a684

13 files changed

Lines changed: 173 additions & 264 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tinybytes/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ pretty_assertions = "1.3"
1515
proptest = {version = "1.5", features = ["std"], default-features = false}
1616
test-case = "2.2"
1717
serde_json = "1.0.127"
18+
tinybytes = { path = ".", features = ["bytes_string", "serialization"] }
1819

1920
[dependencies]
2021
serde = { version = "1.0.209", optional = true }

tinybytes/src/bytes_string.rs

Lines changed: 31 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -2,91 +2,26 @@
22
// SPDX-License-Identifier: Apache-2.0
33

44
use crate::Bytes;
5-
#[cfg(all(feature = "bytes_string", feature = "serde"))]
5+
#[cfg(feature = "serde")]
66
use serde::ser::{Serialize, Serializer};
77
use std::borrow::Borrow;
88
use std::str::Utf8Error;
99

10-
#[cfg(feature = "bytes_string")]
11-
pub struct BufferWrapper<'a, 'b> {
12-
buffer: Bytes,
13-
pub underlying: &'a mut &'b [u8],
14-
}
15-
16-
#[cfg(feature = "bytes_string")]
17-
impl<'a, 'b> BufferWrapper<'a, 'b> {
18-
/// Creates a new `BufferWrapper` from a `tinybytes::Bytes` instance.
19-
///
20-
/// # Arguments
21-
///
22-
/// * `buffer` - A `tinybytes::Bytes` instance to be wrapped.
23-
///
24-
/// # Returns
25-
///
26-
/// A new `BufferWrapper` instance containing the provided buffer.
27-
pub fn new(buffer: Bytes, underlying: &'a mut &'b [u8]) -> Self {
28-
BufferWrapper { buffer, underlying }
29-
}
30-
31-
/// Creates a `BytesString` from a slice of bytes within the wrapped buffer.
32-
///
33-
/// This function validates that the provided slice is valid UTF-8. If the slice is not valid
34-
/// UTF-8, an error is returned.
35-
///
36-
/// # Arguments
37-
///
38-
/// * `slice` - A byte slice that will be converted into a `BytesString`.
39-
///
40-
/// # Returns
41-
///
42-
/// A `Result` containing the `BytesString` if the slice is valid UTF-8, or a `Utf8Error` if
43-
/// the slice is not valid UTF-8.
44-
///
45-
/// # Errors
46-
///
47-
/// Returns a `Utf8Error` if the bytes are not valid UTF-8.
48-
pub fn create_bytes_string(&self, slice: &[u8]) -> Result<BytesString, std::str::Utf8Error> {
49-
BytesString::from_bytes(self.buffer.slice_ref(slice).expect("Invalid slice"))
50-
}
51-
52-
/// Creates a `BytesString` from a slice of bytes within the wrapped buffer without validating
53-
/// the bytes.
54-
///
55-
/// This function does not perform any validation on the provided bytes, and assumes that the
56-
/// bytes are valid UTF-8. If the bytes are not valid UTF-8, the behavior is undefined.
57-
///
58-
/// # Arguments
59-
///
60-
/// * `slice` - A byte slice that will be converted into a `BytesString`.
61-
///
62-
/// # Safety
63-
///
64-
/// This function is unsafe because it assumes the bytes are valid UTF-8. If the bytes are not
65-
/// valid UTF-8, the behavior is undefined.
66-
pub unsafe fn create_bytes_string_unchecked(&self, slice: &[u8]) -> BytesString {
67-
BytesString::from_bytes_unchecked(self.buffer.slice_ref(slice).expect("Invalid slice"))
68-
}
69-
}
70-
71-
#[cfg(feature = "bytes_string")]
7210
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
7311
pub struct BytesString {
7412
bytes: Bytes,
7513
}
7614

77-
#[cfg(all(feature = "bytes_string", feature = "serde"))]
15+
#[cfg(feature = "serde")]
7816
impl Serialize for BytesString {
7917
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
8018
where
8119
S: Serializer,
8220
{
83-
// This should be safe because we have already validated that the bytes are valid UTF-8 when
84-
// creating the BytesString.
85-
unsafe { serializer.serialize_str(self.as_str_unchecked()) }
21+
serializer.serialize_str(self.as_str())
8622
}
8723
}
8824

89-
#[cfg(feature = "bytes_string")]
9025
impl BytesString {
9126
/// Creates a `BytesString` from a slice of bytes.
9227
///
@@ -105,7 +40,7 @@ impl BytesString {
10540
/// # Errors
10641
///
10742
/// Returns a `Utf8Error` if the bytes are not valid UTF-8.
108-
pub fn from_slice(slice: &[u8]) -> Result<BytesString, std::str::Utf8Error> {
43+
pub fn from_slice(slice: &[u8]) -> Result<BytesString, Utf8Error> {
10944
std::str::from_utf8(slice)?;
11045
Ok(BytesString {
11146
bytes: Bytes::copy_from_slice(slice),
@@ -129,11 +64,26 @@ impl BytesString {
12964
/// # Errors
13065
///
13166
/// Returns a `Utf8Error` if the bytes are not valid UTF-8.
132-
pub fn from_bytes(bytes: Bytes) -> Result<BytesString, std::str::Utf8Error> {
67+
pub fn from_bytes(bytes: Bytes) -> Result<BytesString, Utf8Error> {
13368
std::str::from_utf8(&bytes)?;
13469
Ok(BytesString { bytes })
13570
}
13671

72+
/// Creates a `BytesString` from a string slice within the given buffer.
73+
///
74+
/// # Arguments
75+
///
76+
/// * `bytes` - A `tinybytes::Bytes` instance that will be converted into a `BytesString`.
77+
/// * `slice` - The string slice pointing into the given bytes that will form the `BytesString`.
78+
pub fn from_bytes_slice(bytes: &Bytes, slice: &str) -> BytesString {
79+
// SAFETY: This is safe as a str slice is definitely a valid UTF-8 slice.
80+
unsafe {
81+
BytesString::from_bytes_unchecked(
82+
bytes.slice_ref(slice.as_bytes()).expect("Invalid slice"),
83+
)
84+
}
85+
}
86+
13787
/// Creates a `BytesString` from a `tinybytes::Bytes` instance without validating the bytes.
13888
///
13989
/// This function does not perform any validation on the provided bytes, and assumes that the
@@ -147,37 +97,20 @@ impl BytesString {
14797
///
14898
/// This function is unsafe because it assumes the bytes are valid UTF-8. If the bytes are not
14999
/// valid UTF-8, the behavior is undefined.
150-
pub fn from_bytes_unchecked(bytes: Bytes) -> BytesString {
100+
pub unsafe fn from_bytes_unchecked(bytes: Bytes) -> BytesString {
151101
BytesString { bytes }
152102
}
153103

154-
/// Returns the string slice representation of the `BytesString`. The slice is checked to be
155-
/// valid UTF-8. If you use `from_bytes` or `from_slice` this check was already performed and
156-
/// you may want to use `as_str_unchecked` instead.
157-
///
158-
/// # Errors
159-
///
160-
/// Returns a `Utf8Error` if the bytes are not valid UTF-8.
161-
pub fn as_str(&self) -> Result<&str, Utf8Error> {
162-
std::str::from_utf8(&self.bytes)
163-
}
164-
165-
/// Returns the string slice representation of the `BytesString` without validating the bytes.
104+
/// Returns the string slice representation of the `BytesString` (without validating the bytes).
166105
/// Typically, you should use `from_slice` or `from_bytes` when creating a BytesString to
167106
/// ensure the bytes are valid UTF-8 (if the bytes haven't already been validated by other
168107
/// means) so further validation may be unnecessary.
169-
///
170-
/// # Safety
171-
///
172-
/// This function is unsafe because it assumes the bytes are valid UTF-8. If the bytes are not
173-
/// valid UTF-8, the behavior is undefined.
174-
pub unsafe fn as_str_unchecked(&self) -> &str {
175-
// SAFETY: This is unsafe and assumes the bytes are valid UTF-8.
108+
pub fn as_str(&self) -> &str {
109+
// SAFETY: We assume all BytesStrings are valid UTF-8.
176110
unsafe { std::str::from_utf8_unchecked(&self.bytes) }
177111
}
178112
}
179113

180-
#[cfg(feature = "bytes_string")]
181114
impl Default for BytesString {
182115
fn default() -> Self {
183116
BytesString {
@@ -186,25 +119,21 @@ impl Default for BytesString {
186119
}
187120
}
188121

189-
#[cfg(feature = "bytes_string")]
190122
impl Borrow<str> for BytesString {
191123
fn borrow(&self) -> &str {
192-
// This is safe because we have already validated that the bytes are valid UTF-8 when
193-
// creating the BytesString.
194-
unsafe { self.as_str_unchecked() }
124+
self.as_str()
195125
}
196126
}
197127

198128
#[cfg(test)]
199129
mod tests {
200130
use super::*;
201-
use serde_json;
202131

203132
#[test]
204133
fn test_from_slice() {
205134
let slice = b"hello";
206135
let bytes_string = BytesString::from_slice(slice).unwrap();
207-
assert_eq!(bytes_string.as_str().unwrap(), "hello");
136+
assert_eq!(bytes_string.as_str(), "hello");
208137
}
209138

210139
#[test]
@@ -218,7 +147,7 @@ mod tests {
218147
fn test_from_bytes() {
219148
let bytes = Bytes::copy_from_slice(b"world");
220149
let bytes_string = BytesString::from_bytes(bytes).unwrap();
221-
assert_eq!(bytes_string.as_str().unwrap(), "world");
150+
assert_eq!(bytes_string.as_str(), "world");
222151
}
223152

224153
#[test]
@@ -231,14 +160,14 @@ mod tests {
231160
#[test]
232161
fn test_from_bytes_unchecked() {
233162
let bytes = Bytes::copy_from_slice(b"unchecked");
234-
let bytes_string = BytesString::from_bytes_unchecked(bytes);
235-
assert_eq!(bytes_string.as_str().unwrap(), "unchecked");
163+
let bytes_string = unsafe { BytesString::from_bytes_unchecked(bytes) };
164+
assert_eq!(bytes_string.as_str(), "unchecked");
236165
}
237166

238167
#[test]
239168
fn test_as_str() {
240169
let bytes_string = BytesString::from_slice(b"test").unwrap();
241-
assert_eq!(bytes_string.as_str().unwrap(), "test");
170+
assert_eq!(bytes_string.as_str(), "test");
242171
}
243172

244173
#[test]
@@ -251,7 +180,7 @@ mod tests {
251180
#[test]
252181
fn test_default() {
253182
let bytes_string: BytesString = Default::default();
254-
assert_eq!(bytes_string.as_str().unwrap(), "");
183+
assert_eq!(bytes_string.as_str(), "");
255184
}
256185

257186
#[test]

tinybytes/src/lib.rs

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,7 @@ use std::{
1010
/// Immutable bytes type with zero copy cloning and slicing.
1111
#[derive(Clone)]
1212
pub struct Bytes {
13-
ptr: *const u8,
14-
len: usize,
13+
slice: &'static [u8],
1514
// The `bytes`` field is used to ensure that the underlying bytes are freed when there are no
1615
// more references to the `Bytes` object. For static buffers the field is `None`.
1716
bytes: Option<Arc<dyn UnderlyingBytes>>,
@@ -36,11 +35,7 @@ impl Bytes {
3635
#[inline]
3736
pub fn from_static(value: &'static [u8]) -> Self {
3837
let slice: &[u8] = value;
39-
Self {
40-
ptr: slice.as_ptr(),
41-
len: slice.len(),
42-
bytes: None,
43-
}
38+
Self { slice, bytes: None }
4439
}
4540

4641
/// Creates `Bytes` from a slice, by copying.
@@ -51,13 +46,13 @@ impl Bytes {
5146
/// Returns the length of the `Bytes`.
5247
#[inline]
5348
pub const fn len(&self) -> usize {
54-
self.len
49+
self.slice.len()
5550
}
5651

5752
/// Returns `true` if the `Bytes` is empty.
5853
#[inline]
5954
pub const fn is_empty(&self) -> bool {
60-
self.len == 0
55+
self.slice.is_empty()
6156
}
6257

6358
/// Returns a slice of self for the provided range.
@@ -154,38 +149,49 @@ impl Bytes {
154149

155150
let subset_start = subset.as_ptr() as usize;
156151
let subset_end = subset_start + subset.len();
157-
let self_start = self.ptr as usize;
158-
let self_end = self_start + self.len;
152+
let self_start = self.slice.as_ptr() as usize;
153+
let self_end = self_start + self.slice.len();
159154
if subset_start >= self_start && subset_end <= self_end {
160155
Some(self.safe_slice_ref(subset_start - self_start, subset_end - self_start))
161156
} else {
162157
None
163158
}
164159
}
165160

161+
/// Returns a mutable reference to the slice of self.
162+
/// Allows for fast unchecked shrinking of the slice.
163+
///
164+
/// # Safety
165+
///
166+
/// Callers of that function must make sure that they only put subslices of the slice into the
167+
/// returned reference.
168+
/// They also need to make sure to not persist the slice reference for longer than the struct
169+
/// lives.
170+
#[inline]
171+
pub unsafe fn as_mut_slice(&mut self) -> &mut &'static [u8] {
172+
&mut self.slice
173+
}
174+
166175
// private
167176

168177
fn from_underlying(value: impl UnderlyingBytes) -> Self {
169-
let slice: &[u8] = value.as_ref();
170178
Self {
171-
ptr: slice.as_ptr(),
172-
len: slice.len(),
179+
slice: unsafe { std::mem::transmute::<&'_ [u8], &'static [u8]>(value.as_ref()) },
173180
bytes: Some(Arc::new(value)),
174181
}
175182
}
176183

177184
#[inline]
178185
fn safe_slice_ref(&self, start: usize, end: usize) -> Self {
179186
Self {
180-
ptr: unsafe { self.ptr.add(start) },
181-
len: end - start,
187+
slice: &self.slice[start..end],
182188
bytes: self.bytes.clone(),
183189
}
184190
}
185191

186192
#[inline]
187193
fn as_slice(&self) -> &[u8] {
188-
unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
194+
self.slice
189195
}
190196
}
191197

@@ -264,6 +270,10 @@ impl fmt::Debug for Bytes {
264270
}
265271
}
266272

267-
pub mod bytes_string;
273+
#[cfg(feature = "bytes_string")]
274+
mod bytes_string;
275+
#[cfg(feature = "bytes_string")]
276+
pub use bytes_string::BytesString;
277+
268278
#[cfg(test)]
269279
mod test;

trace-utils/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ criterion = "0.5.1"
4848
httpmock = { version = "0.7.0"}
4949
serde_json = "1.0"
5050
tokio = { version = "1", features = ["macros", "rt-multi-thread"] }
51+
datadog-trace-utils = { path = ".", features = ["test-utils"] }
5152

5253
[features]
5354
test-utils = ["httpmock", "testcontainers", "cargo_metadata", "cargo-platform"]

0 commit comments

Comments
 (0)