Created
January 24, 2017 18:29
-
-
Save ayende/13dc2d15563ba04638d2e5c223e4bbe4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern crate alloc; | |
extern crate libc; | |
use std::mem; | |
use std::ptr; | |
pub struct Trie { | |
data: alloc::raw_vec::RawVec<u8>, | |
} | |
/* | |
The trie format relies on a single allocation of 32KB in size (using RawVec). | |
We manipulate the memory there directly, using unsafe code. | |
The format is: | |
[Trie Header - global information about the trie, number of items, allocations, etc] | |
[Root node (first one after the trie header) | |
Contains the key offset for the portion of the key it owns, this contains just {key_size} bytes with the key | |
Contains the offset to its children | |
This contains first a u16 number of children | |
Then an array of that size of NodeHeader for the children (unsorted right now) | |
] | |
No attempt is made to reclaim memory, we always allocate from the end, and if needed, we'll defrag. | |
*/ | |
#[derive(Debug)] | |
pub enum TrieErrors { | |
NewVal, | |
UpdatedVal, | |
NotEnoughSpace, | |
ValNotFound, | |
RequiresDefrag, | |
KeyTooBig, | |
KeyPrefixOnly, | |
} | |
enum NodeErrors<'a> { | |
ValNotFound, | |
PartialMatch { closest : &'a NodeHeader } | |
} | |
#[derive(Debug)] | |
struct TrieHeader { | |
items: u16, | |
next_alloc: u16, | |
size_used: u16, | |
reserved: u16, | |
} | |
#[derive(Debug)] | |
struct NodeHeader { | |
key_offset: u16, | |
key_size: u8, | |
has_value: bool, | |
children_offset: u16, | |
} | |
struct NodeAndParent<'a> { | |
parent: Option<&'a NodeHeader> , | |
node : &'a NodeHeader, | |
node_index_at_parent: u16 | |
} | |
const TRIE_SIZE: usize = 32 * 1024; | |
impl NodeHeader { | |
fn number_of_children(self: &NodeHeader) -> u16 { | |
unsafe{ | |
let ptr = ((self as *const NodeHeader) as *const u8) | |
.offset(self.children_offset as isize) as *const u16; | |
*ptr | |
} | |
} | |
fn set_number_of_children(self: &NodeHeader, val: u16) { | |
unsafe{ | |
let ptr = ((self as *const NodeHeader) as *const u8) | |
.offset(self.children_offset as isize) as *mut u16; | |
*ptr = val | |
} | |
} | |
fn nth_child(self: &NodeHeader, n: u16) -> &NodeHeader { | |
unsafe{ | |
let child_offset : isize = self.children_offset as isize + (mem::size_of::<u16>() + | |
(mem::size_of::<NodeHeader>() * n as usize) ) as isize; | |
&*(((self as *const NodeHeader) as *const u8).offset(child_offset) as *const NodeHeader) | |
} | |
} | |
fn key(self: &NodeHeader) -> *const u8 { | |
unsafe{ | |
((self as *const NodeHeader) as *const u8).offset(self.key_offset as isize) | |
} | |
} | |
fn val(self: &NodeHeader) -> Option<i64> { | |
if self.has_value == false { | |
return None; | |
} | |
unsafe{ | |
Some(*(((self as *const NodeHeader) as *const u8).offset(mem::size_of::<NodeHeader>() as isize) as *const i64)) | |
} | |
} | |
fn find_match<'a>(self: &'a NodeHeader, parent: Option<&'a NodeHeader>, parent_index: u16, key: &str) -> Result<NodeAndParent, NodeErrors>{ | |
unsafe { | |
let ptr = ((self as *const NodeHeader) as *const u8) | |
.offset(mem::size_of::<NodeHeader>() as isize); | |
if self.key_size as usize > key.len() { | |
// if the key is longer, this will obviously not match | |
return Err(NodeErrors::ValNotFound); | |
} | |
for i in 0..self.key_size { | |
if *ptr.offset(i as isize) != key.as_bytes()[i as usize] { | |
return Err(NodeErrors::ValNotFound); | |
} | |
} | |
// found an exact match, can return this node | |
if self.key_size as usize == key.len() { | |
return Ok(NodeAndParent{parent: parent, node_index_at_parent: parent_index, node: self}) | |
} | |
// found partial match, now need to find to see we can recurse | |
for i in 0..self.number_of_children() { | |
let child = self.nth_child(i); | |
let child_key = child.key(); | |
if *child_key == key.as_bytes()[self.key_size as usize] { | |
return child.find_match(Some(self), i , &key[self.key_size as usize..]); | |
} | |
} | |
Err(NodeErrors::PartialMatch{closest: self}) | |
} | |
} | |
} | |
impl Trie { | |
pub fn new() -> Trie { | |
let vec = alloc::raw_vec::RawVec::with_capacity(TRIE_SIZE); | |
let mut header = vec.ptr() as *mut TrieHeader; | |
unsafe { | |
(*header).items = 0; | |
(*header).next_alloc = mem::size_of::<TrieHeader>() as u16; | |
(*header).size_used = mem::size_of::<TrieHeader>() as u16; | |
} | |
Trie { data: vec } | |
} | |
pub fn write(self: &mut Trie, key: &str, val: i64) -> Result<(), TrieErrors> { | |
if key.len() > u8::max_value() as usize { | |
return Err(TrieErrors::KeyTooBig); | |
} | |
let header = self.trie_header(); | |
let required_size = mem::size_of::<NodeHeader>() + key.len() + mem::size_of::<i64>(); | |
if required_size > TRIE_SIZE { | |
return Err(TrieErrors::NotEnoughSpace); | |
} | |
if (required_size + header.size_used as usize) > TRIE_SIZE { | |
return Err(TrieErrors::NotEnoughSpace); | |
} | |
if (required_size + header.next_alloc as usize) > TRIE_SIZE { | |
return Err(TrieErrors::RequiresDefrag); | |
} | |
unimplemented!() | |
} | |
pub fn read(self: &Trie, key: &str) -> Result<i64, TrieErrors> { | |
let result = try! (self.find_node(key)); | |
match result.node.val() { | |
None => Err(TrieErrors::KeyPrefixOnly), | |
Some(a) => Ok(a) | |
} | |
} | |
fn find_node(self: &Trie, key: &str) -> Result<NodeAndParent, TrieErrors> { | |
let header = self.trie_header(); | |
if header.items == 0 { | |
return Err(TrieErrors::ValNotFound); | |
} | |
let node = self.node_header(mem::size_of::<TrieHeader>() as isize); | |
match node.find_match(None, 0, key) { | |
Err(NodeErrors::ValNotFound) | | |
Err(NodeErrors::PartialMatch{ closest: _}) | |
=> Err(TrieErrors::ValNotFound), | |
Ok(matching_node) => Ok(matching_node) | |
} | |
} | |
pub fn delete(self: &mut Trie, key: &str) -> Result<(), TrieErrors> { | |
{ | |
let result = try! (self.find_node(key)); | |
if result.node.has_value == false{ | |
return Err(TrieErrors::KeyPrefixOnly); | |
} | |
} | |
self.delete_internal(key); | |
Ok(()) | |
} | |
fn delete_internal(self: &mut Trie, key: &str) { | |
let parent_key_size: usize; | |
let state : u32; | |
{ | |
let result = self.find_node(key).unwrap(); // we are ensured that this cannot fail from our callers | |
parent_key_size = key.len() - result.node.key_size as usize; | |
match result.parent { | |
None => { | |
state = 0; | |
}, | |
Some(parent) => { | |
if parent.number_of_children() == 1 { | |
state = 1; | |
} | |
else { | |
// we need to move it over | |
state = 2 | |
} | |
} | |
} | |
} | |
match state { | |
0 => { | |
// there is just one entry in the trie | |
let mut header = self.mut_trie_header(); | |
header.items = 0; | |
header.next_alloc = mem::size_of::<TrieHeader>() as u16; | |
header.size_used = mem::size_of::<TrieHeader>() as u16; | |
}, | |
1 => { | |
// this has just one entry, so need to remove it as well | |
self.delete_internal(&key[ ..parent_key_size]); | |
} | |
_ => unimplemented!() | |
} | |
} | |
fn mut_trie_header(self: &mut Trie) -> &mut TrieHeader { | |
unsafe { | |
let header = self.data.ptr() as *mut TrieHeader; | |
return &mut *header; | |
} | |
} | |
fn trie_header(self: &Trie) -> &TrieHeader { | |
unsafe { | |
let header = self.data.ptr() as *const TrieHeader; | |
return &*header; | |
} | |
} | |
fn node_header_mut(self: &mut Trie, offset: isize) -> &mut NodeHeader { | |
unsafe { | |
return &mut *(self.data.ptr().offset(offset) as *mut NodeHeader); | |
} | |
} | |
fn node_header(self: &Trie, offset: isize) -> &NodeHeader { | |
unsafe { | |
return &*(self.data.ptr().offset(offset) as *const NodeHeader); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment