Last active
June 8, 2024 13:42
-
-
Save rob-p/cadc9bd2055a3464aca48cdea7e4ab47 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use anyhow; | |
use ndarray::Array2; | |
use std::fs; | |
use std::cmp::Ordering; | |
fn naive_table(text: &str) -> Vec<u32> { | |
let text = text.as_bytes(); | |
assert!(text.len() <= u32::MAX as usize); | |
let mut table = vec![0u32; text.len()]; | |
for (i, element) in table.iter_mut().enumerate() { | |
*element = i as u32; | |
} | |
table.sort_by(|&a, &b| text[a as usize..].cmp(&text[b as usize..])); | |
table | |
} | |
struct BTreeState<'a> { | |
nblocks: usize, | |
B: usize, | |
t: usize, | |
n: usize, | |
a: &'a [u32], | |
} | |
impl<'a> BTreeState<'a> { | |
fn new(n: usize, b: usize, a: &'a [u32]) -> Self { | |
let B = b; | |
let nblocks = (n + B - 1) / B; | |
BTreeState { | |
nblocks, | |
B, | |
t: 0, | |
n, | |
a: &a, | |
} | |
} | |
fn search(&self, x: u32, btree: &Array2<u32>) -> u32 { | |
let mut k = 0; | |
let mut res = PLACEHOLDER; | |
loop { | |
'start: { | |
while k < self.nblocks { | |
for i in 0..self.B { | |
if btree[[k, i]] >= x { | |
res = btree[[k, i]]; | |
let oldk = k; | |
k = go(&self, k, i); | |
println!(r"res = {res}, k = {oldk}, i = {i}, newk = {k}"); | |
break 'start; | |
} | |
} | |
} | |
k = go(&self, k, self.B); | |
} | |
if k >= self.nblocks { break; } | |
} | |
res | |
} | |
fn search_fast(&self, x: &str, btree: &Array2<u32>, s: &str) -> u32 { | |
let mut k = 0; | |
let mut res = PLACEHOLDER; | |
while k < self.nblocks { | |
let mut mask: usize = 1 << self.B; | |
for i in 0..self.B { | |
mask |= match s[btree[[k, i]] as usize..].cmp(x) { | |
Ordering::Greater | Ordering::Equal => { | |
1 | |
}, | |
Ordering::Less => { | |
0 | |
}, | |
} << i; | |
} | |
let i = mask.trailing_zeros() as usize; | |
if i < self.B { | |
res = btree[[k, i]] | |
} | |
k = self.get_offset(k, i); | |
} | |
res | |
} | |
fn get_offset(&self, k: usize, i: usize) -> usize { | |
k * (self.B + 1) + i + 1 | |
} | |
} | |
const PLACEHOLDER: u32 = u32::MAX; | |
fn go(state: &BTreeState, k: usize, i: usize) -> usize { | |
k * (state.B + 1) + i + 1 | |
} | |
fn build(state: &mut BTreeState, k: usize, btree: &mut Array2<u32>) { | |
if k < state.nblocks { | |
for i in 0..state.B { | |
build(state, state.get_offset(k, i), btree); | |
btree[[k, i]] = if state.t < state.n { | |
let r = state.a[state.t]; | |
state.t += 1; | |
r | |
} else { | |
PLACEHOLDER | |
}; | |
} | |
let idx = state.get_offset(k, state.B); | |
build(state, idx, btree); | |
} | |
} | |
fn main() { | |
let s = "the quick brown fox was quick.$"; | |
let a = naive_table(&s); | |
const B: usize = 4; | |
let mut state = BTreeState::new(a.len(), B, &a); | |
let mut btree = Array2::zeros((state.nblocks, B)); | |
build(&mut state, 0, &mut btree); | |
println!("btree =\n{:?}", btree); | |
let p = "quick#"; | |
let r2 = state.search_fast(&p, &btree, &s); | |
let r = r2; | |
println!("search for {:?} yields {}, fast search yields {}", p, r, r2); | |
println!("{}", &s[r2 as usize..]); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment