Skip to content

Instantly share code, notes, and snippets.

@ear7h
Created February 1, 2021 19:24
Show Gist options
  • Save ear7h/ca840aad1ad239db91c80b083264e0ef to your computer and use it in GitHub Desktop.
Save ear7h/ca840aad1ad239db91c80b083264e0ef to your computer and use it in GitHub Desktop.
Ring buffer of utf8 strings
/// Ring buffer of utf8 strings
struct RingBuf {
ptr : *mut u8,
start : usize,
len : usize,
cap : usize,
}
impl RingBuf {
fn new(cap : usize) -> RingBuf {
let (ptr, len, cap) = Vec::with_capacity(cap).into_raw_parts();
RingBuf {
ptr : ptr,
start : 0,
len : len,
cap : cap,
}
}
fn chunks(&self) -> (&[u8], &[u8]) {
unsafe {
(
std::slice::from_raw_parts(
self.ptr.offset(self.start as isize),
(self.cap - self.start).min(self.len),
),
std::slice::from_raw_parts(
self.ptr,
self.len - (self.cap - self.start).min(self.len),
),
)
}
}
fn write_to<W : Write>(self, w : W) -> Result<()> {
let (l, r) = self.chunks();
w.write(l)?;
w.write(r)
}
fn unicode_chunks(&self) -> (&str, Option<char>, &str) {
let s2s = |x| {
unsafe { std::str::from_utf8_unchecked(x) }
};
let (c1, c2) = self.chunks();
if c2.len() == 0 || is_char_boundary(c2[0]) {
// c1.len() == 0 is covered because it is implied by c2.len() == 0
return (s2s(c1), None, s2s(c2));
} else {
// find a char boundary from the right on the left chunk
let mut left = 0;
while !is_char_boundary(c1[c1.len() - left - 1]) {
if left > 4 {
panic!("invalid utf-8 in left chunk!!");
}
left += 1;
}
let mut right = 0;
while !is_char_boundary(c2[right]) {
if left + right > 4 {
panic!("invalid utf-8 in right chunk!!");
}
right += 1;
}
let (l, lm) = c1.split_at(c1.len() - left - 1);
let (rm, r) = c2.split_at(right);
let mut buf : [u8; 4] = [0, 0, 0, 0];
for (idx, b) in lm.iter().chain(rm).enumerate() {
buf[idx] = *b;
}
let c = unsafe {
std::str::from_utf8_unchecked(&buf[..=(left+right)])
}.chars().next().unwrap();
return (s2s(l), Some(c), s2s(r));
}
}
fn next_idx(&self) -> usize {
(self.start + self.len) % self.cap
}
fn push_char(&mut self, c : char) {
let mut b : [u8; 4] = [0, 0, 0, 0];
let s = c.encode_utf8(&mut b);
self.push_slice(s.as_slice());
}
fn push_str(&mut self, s : &str) {
if s.len() == 0 {
return;
}
let mut start = if s.len() > self.cap {
s.len() - self.cap
} else {
0
};
while !s.is_char_boundary(start) {
start += 1;
if self.cap < 4 && start >= s.len() {
// if we keep incrementing start
return;
}
}
let s = unsafe { s.get_unchecked(start..) };
// copying aligned to the beginning of the allocation
if s.len() == self.cap || (self.start + self.len) == 0 {
unsafe {
self.ptr.copy_from(s.as_ptr(), s.len());
}
self.start = 0;
self.len = (self.len + s.len()).min(self.cap);
return;
}
let next_idx = self.next_idx();
let l1 = (self.cap - next_idx).min(s.len());
unsafe {
self.ptr.offset(next_idx as isize).copy_from(s.as_ptr(), l1);
}
let l2 = s.len() - l1;
unsafe {
self.ptr.copy_from(s.as_ptr().offset(l1 as isize), l2);
}
println!("l1: {}, l2: {}", l1, l2);
self.start = if l2 == 0 {
self.start
} else {
(self.start + self.len + s.len()) % self.cap
};
self.len = (self.len + s.len()).min(self.cap);
}
}
impl Drop for RingBuf {
fn drop(&mut self) {
let v = unsafe { Vec::from_raw_parts(self.ptr, 0, self.cap) };
std::mem::drop(v);
}
}
impl std::fmt::Debug for RingBuf {
fn fmt(&self, f : &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = unsafe {
std::slice::from_raw_parts::<u8>(self.ptr, self.cap)
};
f.debug_struct("RingBuf")
.field("ptr", &s)
.field("start", &self.start)
.field("len", &self.len)
.field("cap", &self.cap)
.finish()
}
}
impl std::fmt::Display for RingBuf {
fn fmt(&self, f : &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let (l, m, r) = self.unicode_chunks();
f.write_str(l)?;
if let Some(c) = m {
write!(f, "{}", c)?;
}
f.write_str(r)
}
}
// taken from stdlib https://doc.rust-lang.org/src/core/str/mod.rs.html#193-205
fn is_char_boundary(b : u8) -> bool {
(b as i8) >= -0x40
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment