Created
November 12, 2015 01:18
-
-
Save anonymous/61ae8b305e6abff3291c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "fst-int" | |
version = "0.1.0" | |
authors = ["Andrew Gallant <[email protected]>"] | |
[dependencies] | |
byteorder = "0.4" | |
fst = "0.1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
extern crate byteorder; | |
extern crate fst; | |
use byteorder::{ByteOrder, BigEndian}; | |
use fst::{IntoStreamer, Streamer}; | |
fn main() { | |
let primes = sieve(100_000_000); | |
println!("# primes: {}, size: {} bytes", primes.len(), primes.len() * 4); | |
let fst = FstIntSet::from_vec(primes); | |
println!("FST size: {} bytes", fst.size()); | |
// Very quickly query for whether an integer is prime. | |
println!("prime? {} => {}", 1, fst.contains(1)); | |
println!("prime? {} => {}", 2, fst.contains(2)); | |
println!("prime? {} => {}", 999613, fst.contains(999613)); | |
println!("prime? {} => {}", 999614, fst.contains(999614)); | |
println!("prime? {} => {}", 9999973, fst.contains(9999973)); | |
// Range queries are totally cool because big-endian preserves | |
// integer ordering. If we encoded integers as little-endian, this would | |
// not work! | |
let (start, end) = (999_800, 1_000_000); | |
println!("primes in range {}-{}", start, end); | |
let mut stream = fst.range(start, end); | |
while let Some(key) = stream.next() { | |
let n = BigEndian::read_u32(&key); | |
println!("{}", n); | |
} | |
} | |
struct FstIntSet { | |
set: fst::Set, | |
} | |
impl FstIntSet { | |
fn from_vec(mut nums: Vec<u32>) -> FstIntSet { | |
nums.sort(); | |
nums.dedup(); | |
let mut build = fst::SetBuilder::memory(); | |
for n in nums { | |
let mut buf = [0; 4]; | |
BigEndian::write_u32(&mut buf, n); | |
build.insert(buf).unwrap(); | |
} | |
let bytes = build.into_inner().unwrap(); | |
FstIntSet { | |
set: fst::Set::from_bytes(bytes).unwrap(), | |
} | |
} | |
fn contains(&self, n: u32) -> bool { | |
let mut buf = [0; 4]; | |
BigEndian::write_u32(&mut buf, n); | |
self.set.contains(buf) | |
} | |
fn range(&self, start: u32, end: u32) -> fst::set::Stream { | |
let (mut bstart, mut bend) = ([0; 4], [0; 4]); | |
BigEndian::write_u32(&mut bstart, start); | |
BigEndian::write_u32(&mut bend, end); | |
self.set.range().ge(bstart).lt(bend).into_stream() | |
} | |
fn size(&self) -> usize { | |
self.set.as_fst().size() | |
} | |
} | |
fn sieve(n: usize) -> Vec<u32> { | |
if n <= 1 { | |
return vec![]; | |
} | |
// Taken from: | |
// https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes#Algorithm_and_variants | |
let mut marked = vec![true; n+1]; | |
marked[0] = false; | |
marked[1] = false; | |
for i in 2..((n as f64).sqrt().ceil() as usize) { | |
if marked[i] { | |
let mut j = i * i; | |
let mut k = 0; | |
while j <= n { | |
marked[j] = false; | |
k += 1; | |
j = (i * i) + k * i; | |
} | |
} | |
} | |
marked.iter() | |
.enumerate() | |
.filter_map(|(i, &m)| if !m { None } else { Some(i as u32) }) | |
.collect() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment