Skip to content

Instantly share code, notes, and snippets.

@sahandevs
Created April 11, 2023 08:26
Show Gist options
  • Save sahandevs/5cec56485e44034b6a60e58f742fa221 to your computer and use it in GitHub Desktop.
Save sahandevs/5cec56485e44034b6a60e58f742fa221 to your computer and use it in GitHub Desktop.
regex in shared memory
use std::{
alloc::{GlobalAlloc, Layout, System},
ptr::null_mut,
sync::atomic::{AtomicUsize, Ordering::SeqCst},
};
struct AllocatorSwitch {
state: State,
}
pub enum State {
SystemAllocator,
SharedMemory(SharedMemory),
}
pub struct SharedMemory {
pub shm: *mut u8,
pub remaining: AtomicUsize,
}
const MAX_SUPPORTED_ALIGN: usize = 4096;
unsafe impl Sync for State {}
pub fn switch(state: State) -> State {
let mut x = state;
unsafe { std::mem::swap(&mut GLOBAL.state, &mut x) };
x
}
unsafe impl GlobalAlloc for AllocatorSwitch {
unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
match &self.state {
State::SystemAllocator => System.alloc(layout),
State::SharedMemory(x) => {
let size = layout.size();
let align = layout.align();
let align_mask_to_round_down = !(align - 1);
if align > MAX_SUPPORTED_ALIGN {
println!("a");
return null_mut();
}
let mut allocated = 0;
if x.remaining
.fetch_update(SeqCst, SeqCst, |mut remaining| {
if size > remaining {
return None;
}
remaining -= size;
remaining &= align_mask_to_round_down;
allocated = remaining;
Some(remaining)
})
.is_err()
{
println!("b");
return null_mut();
};
x.shm.cast::<u8>().add(allocated)
}
}
}
unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
match self.state {
State::SystemAllocator => System.dealloc(ptr, layout),
State::SharedMemory(_) => {},
}
}
}
#[global_allocator]
static mut GLOBAL: AllocatorSwitch = AllocatorSwitch {
state: State::SystemAllocator,
};
[package]
name = "regex_shared_memory"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
fork = "0.1.21"
libc = "0.2.141"
regex = "1.7.3"
shared_memory = "0.12.4"
pub mod alloc;
use std::sync::atomic::AtomicUsize;
use regex::{Regex, RegexBuilder};
const TO_SEARCH: &'static str = "
On 2010-03-14, foo happened. On 2014-10-14, bar happened.
";
const NAME: &str = "/tmp/regex-7";
fn main() {
let protection_flags = libc::PROT_READ | libc::PROT_WRITE;
let visibility_flags = libc::MAP_SHARED | libc::MAP_ANONYMOUS;
let shm = unsafe {
libc::mmap(
std::ptr::null_mut() as _,
1000000,
protection_flags,
visibility_flags,
-1,
0,
)
};
println!("ptr: {:p}", shm);
alloc::switch(alloc::State::SharedMemory(alloc::SharedMemory {
remaining: AtomicUsize::new(1000000),
shm: shm as _,
}));
let re = Box::new(Regex::new(r"(\d{4})-(\d{2})-(\d{2})").unwrap());
let old = alloc::switch(alloc::State::SystemAllocator);
match fork::fork() {
Ok(fork::Fork::Child) => {
for _ in 0..10 {
for caps in re.captures_iter(TO_SEARCH) {
// Note that all of the unwraps are actually OK for this regex
// because the only way for the regex to match is if all of the
// capture groups match. This is not true in general though!
println!(
"year: {}, month: {}, day: {}",
caps.get(1).unwrap().as_str(),
caps.get(2).unwrap().as_str(),
caps.get(3).unwrap().as_str()
);
}
}
}
Ok(fork::Fork::Parent(_)) => loop {
std::thread::sleep(std::time::Duration::from_secs(10));
match &old {
alloc::State::SystemAllocator => {}
alloc::State::SharedMemory(_) => {}
}
},
Err(_) => todo!(),
}
std::mem::forget(re);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment