Last active
August 3, 2021 01:24
-
-
Save eloff/6e0c236ae6388744a7d5b76a5e0e9b6c to your computer and use it in GitHub Desktop.
merge two adjacent Bytes objects into one
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Public Domain | |
use bytes; | |
struct BytesAlike { | |
data: *const u8, | |
len: usize, | |
_1: usize, | |
_2: usize, | |
} | |
/// unsplit_bytes checks if b2 follows directly after b1 in memory, and if so merges it | |
/// into b1 and returns b1. Otherwise returns b1 and b2 unmodified. | |
/// Safety: because of pointer provenance https://rust-lang.github.io/unsafe-code-guidelines/glossary.html#pointer-provenance | |
/// this may invoke undefined behavior if used to merge two Bytes not originally part of | |
/// the same allocation (e.g. not split from the same BytesMut or similar.) | |
pub unsafe fn unsplit_bytes(mut b1: Bytes, b2: Bytes) -> (Option<Bytes>, Option<Bytes>) { | |
if are_contiguous(&b1, &b2) { | |
assert_eq!(std::mem::size_of::<Bytes>(), std::mem::size_of::<BytesAlike>()); | |
// Safety: this is pretty unsafe, we assume the length is a usize stored after a pointer. | |
// So we check that both the pointer and length fields appear to be where expect them | |
// and if they're not, we simply treat them as if they cannot be merged. | |
let p = b1.as_ptr(); | |
let len = b1.len(); | |
let bytes_ref = unsafe { &mut *(&mut b1 as *mut Bytes as *mut BytesAlike) }; | |
if bytes_ref.data == p && bytes_ref.len == len { | |
bytes_ref.len += b2.len(); | |
return (Some(b1), None); | |
} | |
} | |
(Some(b1), Some(b2)) | |
} | |
/// Returns true if b2 immediately follows b1 in memory | |
fn are_contiguous(b1: &Bytes, b2: &Bytes) -> bool { | |
// Safety: we don't dereference end | |
let end = unsafe { b1.as_ptr().add(b1.len()) }; | |
end == b2.as_ptr() | |
} | |
#[cfg(test)] | |
mod tests { | |
use super::*; | |
use bytes::Buf; | |
#[test] | |
fn test_unsplit_bytes_success() { | |
let s = "foobar".as_bytes(); | |
let mut b1 = Bytes::from(s); | |
let b2 = b1.split_off(3); | |
assert!(are_contiguous(&b1, &b2)); | |
let (r1, r2) = unsafe { unsplit_bytes(b1, b2) }; | |
assert!(r1.is_some()); | |
assert!(r2.is_none()); | |
assert_eq!(r1.unwrap().chunk(), s); | |
} | |
#[test] | |
fn test_unsplit_bytes_fail() { | |
let foo = "foopad".as_bytes(); | |
let bar = "bar".as_bytes(); | |
let b1 = Bytes::from(&foo[..3]); | |
let b2 = Bytes::from(bar); | |
assert!(!are_contiguous(&b1, &b2)); | |
let (r1, r2) = unsafe { unsplit_bytes(b1, b2) }; | |
assert_eq!(r1.is_some(), r2.is_some()); | |
assert_eq!(r1.unwrap().chunk(), &foo[..3]); | |
assert_eq!(r2.unwrap().chunk(), bar); | |
} | |
} |
Hey, thank you for your feedback. Your review improved the code. Are you affiliated with miri at all?
Glad it could help! I'm not affiliated, but have tried to help on a few issues on miri's repo in the past. Noticed your issue earlier while checking byte
's repo (not affiliated with it either) and was curious what it was about :)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
sounds reasonable ^^