Skip to content

Instantly share code, notes, and snippets.

@jsimmons
Created April 10, 2017 09:01
Show Gist options
  • Save jsimmons/e5e8eeb3f628ebaaaa942c841da98bcf to your computer and use it in GitHub Desktop.
Save jsimmons/e5e8eeb3f628ebaaaa942c841da98bcf to your computer and use it in GitHub Desktop.
A half baked rust version of Hybernating Rhinos' interview question. https://ayende.com/blog/174049/the-low-level-interview-question
use std::fs::File;
use std::io::BufReader;
use std::io::Read;
use std::io::Write;
use std::result;
use std::mem::transmute;
use std::mem::size_of;
const TRIE_SIZE: usize = 32768;
#[derive(Debug)]
enum TrieError {
NotEnoughSpace,
}
type Result = result::Result<(), TrieError>;
struct TrieHeader {
// grows forwards
node_top: u16,
// grows backwards
data_top: u16,
}
struct TrieNode {
character: u8,
data_pos: u16,
child: u16,
next: u16,
}
pub struct Trie {
buffer: [u8; TRIE_SIZE],
}
impl Trie {
#[inline]
fn mut_data_i64(&mut self, index: u16) -> &mut i64 {
let data_ptr = self.buffer.as_mut_ptr();
unsafe { &mut *transmute::<*mut u8, *mut i64>(data_ptr.offset(index as isize)) }
}
#[inline]
fn mut_node(&mut self, index: u16) -> &mut TrieNode {
let data_ptr = self.buffer.as_mut_ptr();
unsafe { &mut *transmute::<*mut u8, *mut TrieNode>(data_ptr.offset(index as isize)) }
}
#[inline]
fn mut_header(&mut self) -> &mut TrieHeader {
let data_ptr = self.buffer.as_mut_ptr();
unsafe { &mut *transmute::<*mut u8, *mut TrieHeader>(data_ptr) }
}
#[inline]
fn data_i64(&self, index: u16) -> &i64 {
let data_ptr = self.buffer.as_ptr();
unsafe { &*transmute::<*const u8, *const i64>(data_ptr.offset(index as isize)) }
}
#[inline]
fn node(&self, index: u16) -> &TrieNode {
let data_ptr = self.buffer.as_ptr();
unsafe { &*transmute::<*const u8, *const TrieNode>(data_ptr.offset(index as isize)) }
}
#[inline]
fn header(&self) -> &TrieHeader {
let data_ptr = self.buffer.as_ptr();
unsafe { &*transmute::<*const u8, *const TrieHeader>(data_ptr) }
}
fn new() -> Trie {
let mut trie = Trie { buffer: [0; TRIE_SIZE] };
{
let mut header = trie.mut_header();
header.node_top = Trie::root();
header.data_top = TRIE_SIZE as u16;
}
trie
}
#[inline]
fn root() -> u16 {
size_of::<TrieHeader>() as u16
}
#[inline]
fn empty(&self) -> bool {
self.header().node_top == Trie::root()
}
#[inline]
fn available_bytes(&self) -> usize {
let header = self.header();
(header.data_top - header.node_top) as usize
}
fn alloc_node(&mut self) -> u16 {
let mut header = self.mut_header();
let new_idx = header.node_top;
header.node_top = new_idx + size_of::<TrieNode>() as u16;
assert!(header.node_top <= header.data_top);
new_idx
}
fn alloc_data(&mut self) -> u16 {
let mut header = self.mut_header();
header.data_top = header.data_top - size_of::<i64>() as u16;
assert!(header.data_top > header.node_top);
header.data_top
}
fn insert(&mut self, key: &str, value: i64) -> Result {
let bytes = key.as_bytes();
let mut i = 0;
let n = bytes.len();
let mut node_idx = Trie::root();
let mut node_next = node_idx;
if !self.empty() {
'outer: while i < n {
let byte = bytes[i];
loop {
node_idx = node_next;
let node = self.node(node_idx);
if node.character == byte {
i = i + 1;
if node.child == 0 {
// end of the child chain, extend
break 'outer;
} else {
node_next = node.child;
break;
}
}
if node.next == 0 {
// end of the next chain, extend
break 'outer;
}
node_next = node.next;
}
}
}
// calc size needed
// ignore alignment because YOLOSWAG
let required_chars = n - i;
let required_bytes = required_chars * size_of::<TrieNode>() + size_of::<i64>();
if required_bytes > self.available_bytes() {
return Err(TrieError::NotEnoughSpace);
}
/*
h e l l o, w o r l d
|
k a p p a
|
b o r k e
*/
if i < n {
let empty = self.empty();
let new_node_idx = self.alloc_node();
if empty {
// add the root node
let mut root = self.mut_node(new_node_idx);
root.character = bytes[i];
} else {
{
// maybe chain a next node
let mut node = self.mut_node(node_idx);
if node.child == 0 {
node.child = new_node_idx;
} else {
assert_eq!(node.next, 0);
node.next = new_node_idx;
}
}
let mut new_node = self.mut_node(new_node_idx);
new_node.character = bytes[i];
}
node_idx = new_node_idx;
i = i + 1;
}
while i < n {
// add trailing children
let new_node_idx = self.alloc_node();
{
let mut node = self.mut_node(node_idx);
assert_eq!(node.child, 0);
node.child = new_node_idx;
}
let mut new_node = self.mut_node(new_node_idx);
new_node.character = bytes[i];
node_idx = new_node_idx;
i = i + 1;
}
// add data
let data_idx = self.alloc_data();
{
let mut data_node = self.mut_node(node_idx);
assert_eq!(data_node.character as char, bytes[n - 1] as char);
data_node.data_pos = data_idx;
}
let mut data = self.mut_data_i64(data_idx);
*data = value;
Ok(())
}
fn lookup(&self, key: &str) -> Option<i64> {
if self.empty() {
return None;
}
let mut node_idx = Trie::root();
let mut node_next = node_idx;
let bytes = key.as_bytes();
let mut i = 0;
let n = bytes.len();
'outer: while i < n {
let byte = bytes[i];
loop {
node_idx = node_next;
let node = self.node(node_idx);
if node.character == byte {
i = i + 1;
node_next = node.child;
continue 'outer;
}
node_next = node.next;
if node_next == 0 {
return None;
}
}
}
if node_idx == 0 {
return None;
}
let node = self.node(node_idx);
if node.data_pos != 0 {
return Some(*self.data_i64(node.data_pos));
}
None
}
fn delete(&mut self, key: &str) {
if self.empty() {
return;
}
let mut node_idx = Trie::root();
let mut node_next = node_idx;
let bytes = key.as_bytes();
let mut i = 0;
let n = bytes.len();
while i < n {
let byte = bytes[i];
loop {
node_idx = node_next;
let node = self.node(node_idx);
if node.character == byte {
i = i + 1;
node_next = node.child;
break;
}
node_next = node.next;
if node_next == 0 {
return;
}
}
}
if node_idx == 0 {
return;
}
let mut node = self.mut_node(node_idx);
node.data_pos = 0
}
fn save(&self, filename: &str) {
let mut file = File::create(filename).unwrap();
file.write_all(&self.buffer).unwrap();
file.sync_all().unwrap();
}
fn load(&mut self, filename: &str) {
let file = File::open(filename).unwrap();
let mut reader = BufReader::new(file);
reader.read_exact(&mut self.buffer).unwrap();
}
}
fn main() {
let mut trie = Trie::new();
assert_eq!(trie.empty(), true);
trie.insert("he", 2).unwrap();
assert_eq!(trie.empty(), false);
trie.insert("h", 1).unwrap();
trie.insert("hel", 3).unwrap();
trie.insert("hell", 4).unwrap();
trie.insert("hello", 5).unwrap();
trie.insert("hello, world", 1234).unwrap();
trie.insert("hello, kappa", 3222).unwrap();
trie.insert("hello, wibwa", 4321).unwrap();
trie.insert("sporky donkey", 1337).unwrap();
trie.insert("sporky", 1337).unwrap();
trie.save("trie.bin");
let mut trie_load = Trie::new();
trie_load.load("trie.bin");
// 0 32688 1337
assert_eq!(trie_load.lookup("he"), Some(2));
assert_eq!(trie_load.lookup("hello, world"), Some(1234));
assert_eq!(trie_load.lookup("h"), Some(1));
assert_eq!(trie_load.lookup("hello, wibwa"), Some(4321));
assert_eq!(trie_load.lookup("hello, kappa"), Some(3222));
trie_load.delete("hello, world");
assert_eq!(trie_load.lookup("hello, world"), None);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment