Created
February 1, 2021 19:24
-
-
Save ear7h/ca840aad1ad239db91c80b083264e0ef to your computer and use it in GitHub Desktop.
Ring buffer of utf8 strings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/// Ring buffer of utf8 strings | |
struct RingBuf { | |
ptr : *mut u8, | |
start : usize, | |
len : usize, | |
cap : usize, | |
} | |
impl RingBuf { | |
fn new(cap : usize) -> RingBuf { | |
let (ptr, len, cap) = Vec::with_capacity(cap).into_raw_parts(); | |
RingBuf { | |
ptr : ptr, | |
start : 0, | |
len : len, | |
cap : cap, | |
} | |
} | |
fn chunks(&self) -> (&[u8], &[u8]) { | |
unsafe { | |
( | |
std::slice::from_raw_parts( | |
self.ptr.offset(self.start as isize), | |
(self.cap - self.start).min(self.len), | |
), | |
std::slice::from_raw_parts( | |
self.ptr, | |
self.len - (self.cap - self.start).min(self.len), | |
), | |
) | |
} | |
} | |
fn write_to<W : Write>(self, w : W) -> Result<()> { | |
let (l, r) = self.chunks(); | |
w.write(l)?; | |
w.write(r) | |
} | |
fn unicode_chunks(&self) -> (&str, Option<char>, &str) { | |
let s2s = |x| { | |
unsafe { std::str::from_utf8_unchecked(x) } | |
}; | |
let (c1, c2) = self.chunks(); | |
if c2.len() == 0 || is_char_boundary(c2[0]) { | |
// c1.len() == 0 is covered because it is implied by c2.len() == 0 | |
return (s2s(c1), None, s2s(c2)); | |
} else { | |
// find a char boundary from the right on the left chunk | |
let mut left = 0; | |
while !is_char_boundary(c1[c1.len() - left - 1]) { | |
if left > 4 { | |
panic!("invalid utf-8 in left chunk!!"); | |
} | |
left += 1; | |
} | |
let mut right = 0; | |
while !is_char_boundary(c2[right]) { | |
if left + right > 4 { | |
panic!("invalid utf-8 in right chunk!!"); | |
} | |
right += 1; | |
} | |
let (l, lm) = c1.split_at(c1.len() - left - 1); | |
let (rm, r) = c2.split_at(right); | |
let mut buf : [u8; 4] = [0, 0, 0, 0]; | |
for (idx, b) in lm.iter().chain(rm).enumerate() { | |
buf[idx] = *b; | |
} | |
let c = unsafe { | |
std::str::from_utf8_unchecked(&buf[..=(left+right)]) | |
}.chars().next().unwrap(); | |
return (s2s(l), Some(c), s2s(r)); | |
} | |
} | |
fn next_idx(&self) -> usize { | |
(self.start + self.len) % self.cap | |
} | |
fn push_char(&mut self, c : char) { | |
let mut b : [u8; 4] = [0, 0, 0, 0]; | |
let s = c.encode_utf8(&mut b); | |
self.push_slice(s.as_slice()); | |
} | |
fn push_str(&mut self, s : &str) { | |
if s.len() == 0 { | |
return; | |
} | |
let mut start = if s.len() > self.cap { | |
s.len() - self.cap | |
} else { | |
0 | |
}; | |
while !s.is_char_boundary(start) { | |
start += 1; | |
if self.cap < 4 && start >= s.len() { | |
// if we keep incrementing start | |
return; | |
} | |
} | |
let s = unsafe { s.get_unchecked(start..) }; | |
// copying aligned to the beginning of the allocation | |
if s.len() == self.cap || (self.start + self.len) == 0 { | |
unsafe { | |
self.ptr.copy_from(s.as_ptr(), s.len()); | |
} | |
self.start = 0; | |
self.len = (self.len + s.len()).min(self.cap); | |
return; | |
} | |
let next_idx = self.next_idx(); | |
let l1 = (self.cap - next_idx).min(s.len()); | |
unsafe { | |
self.ptr.offset(next_idx as isize).copy_from(s.as_ptr(), l1); | |
} | |
let l2 = s.len() - l1; | |
unsafe { | |
self.ptr.copy_from(s.as_ptr().offset(l1 as isize), l2); | |
} | |
println!("l1: {}, l2: {}", l1, l2); | |
self.start = if l2 == 0 { | |
self.start | |
} else { | |
(self.start + self.len + s.len()) % self.cap | |
}; | |
self.len = (self.len + s.len()).min(self.cap); | |
} | |
} | |
impl Drop for RingBuf { | |
fn drop(&mut self) { | |
let v = unsafe { Vec::from_raw_parts(self.ptr, 0, self.cap) }; | |
std::mem::drop(v); | |
} | |
} | |
impl std::fmt::Debug for RingBuf { | |
fn fmt(&self, f : &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
let s = unsafe { | |
std::slice::from_raw_parts::<u8>(self.ptr, self.cap) | |
}; | |
f.debug_struct("RingBuf") | |
.field("ptr", &s) | |
.field("start", &self.start) | |
.field("len", &self.len) | |
.field("cap", &self.cap) | |
.finish() | |
} | |
} | |
impl std::fmt::Display for RingBuf { | |
fn fmt(&self, f : &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
let (l, m, r) = self.unicode_chunks(); | |
f.write_str(l)?; | |
if let Some(c) = m { | |
write!(f, "{}", c)?; | |
} | |
f.write_str(r) | |
} | |
} | |
// taken from stdlib https://doc.rust-lang.org/src/core/str/mod.rs.html#193-205 | |
fn is_char_boundary(b : u8) -> bool { | |
(b as i8) >= -0x40 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment