Skip to content

Instantly share code, notes, and snippets.

Created November 12, 2015 01:18
Show Gist options
  • Save anonymous/61ae8b305e6abff3291c to your computer and use it in GitHub Desktop.
Save anonymous/61ae8b305e6abff3291c to your computer and use it in GitHub Desktop.
[package]
name = "fst-int"
version = "0.1.0"
authors = ["Andrew Gallant <[email protected]>"]
[dependencies]
byteorder = "0.4"
fst = "0.1"
extern crate byteorder;
extern crate fst;
use byteorder::{ByteOrder, BigEndian};
use fst::{IntoStreamer, Streamer};
fn main() {
let primes = sieve(100_000_000);
println!("# primes: {}, size: {} bytes", primes.len(), primes.len() * 4);
let fst = FstIntSet::from_vec(primes);
println!("FST size: {} bytes", fst.size());
// Very quickly query for whether an integer is prime.
println!("prime? {} => {}", 1, fst.contains(1));
println!("prime? {} => {}", 2, fst.contains(2));
println!("prime? {} => {}", 999613, fst.contains(999613));
println!("prime? {} => {}", 999614, fst.contains(999614));
println!("prime? {} => {}", 9999973, fst.contains(9999973));
// Range queries are totally cool because big-endian preserves
// integer ordering. If we encoded integers as little-endian, this would
// not work!
let (start, end) = (999_800, 1_000_000);
println!("primes in range {}-{}", start, end);
let mut stream = fst.range(start, end);
while let Some(key) = stream.next() {
let n = BigEndian::read_u32(&key);
println!("{}", n);
}
}
struct FstIntSet {
set: fst::Set,
}
impl FstIntSet {
fn from_vec(mut nums: Vec<u32>) -> FstIntSet {
nums.sort();
nums.dedup();
let mut build = fst::SetBuilder::memory();
for n in nums {
let mut buf = [0; 4];
BigEndian::write_u32(&mut buf, n);
build.insert(buf).unwrap();
}
let bytes = build.into_inner().unwrap();
FstIntSet {
set: fst::Set::from_bytes(bytes).unwrap(),
}
}
fn contains(&self, n: u32) -> bool {
let mut buf = [0; 4];
BigEndian::write_u32(&mut buf, n);
self.set.contains(buf)
}
fn range(&self, start: u32, end: u32) -> fst::set::Stream {
let (mut bstart, mut bend) = ([0; 4], [0; 4]);
BigEndian::write_u32(&mut bstart, start);
BigEndian::write_u32(&mut bend, end);
self.set.range().ge(bstart).lt(bend).into_stream()
}
fn size(&self) -> usize {
self.set.as_fst().size()
}
}
fn sieve(n: usize) -> Vec<u32> {
if n <= 1 {
return vec![];
}
// Taken from:
// https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes#Algorithm_and_variants
let mut marked = vec![true; n+1];
marked[0] = false;
marked[1] = false;
for i in 2..((n as f64).sqrt().ceil() as usize) {
if marked[i] {
let mut j = i * i;
let mut k = 0;
while j <= n {
marked[j] = false;
k += 1;
j = (i * i) + k * i;
}
}
}
marked.iter()
.enumerate()
.filter_map(|(i, &m)| if !m { None } else { Some(i as u32) })
.collect()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment