Created
August 18, 2016 15:34
-
-
Save ExpHP/c081842d1daf947bba51ce465716a734 to your computer and use it in GitHub Desktop.
segmented sieve
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#![feature(step_by)] | |
#![feature(test)] | |
pub extern crate test; | |
use test::Bencher; | |
// Using a segmented sieve method for factorization. | |
// This has a couple of speed advantages that are not possible to offer | |
// currently through the abstractions present in my `factor` crate. | |
// | |
// * A more efficient method of generating a factorization sieve (which | |
// in `factor` is known as a `ListFactorizer`) that builds it in segments. | |
// * Linear optimizations in the construction of Factorization objects | |
// from the list, which avoid requirements like `HashMap` by taking advantage | |
// of the fact that the factorization sieve we generate always produces the | |
// smallest factor. (hence factors are produced in order, making it easy | |
// to count duplicates) | |
pub type Int = i32; | |
pub const INT_TYPE_BYTES: usize = 4; // ::std::mem::size_of is not a const fn, because lolidunno | |
// The goal is to work on the sieve in segments that fit in L1 cache. | |
// Unfortunately I do not know how best to make this configurable. | |
const L1D_CACHE_SIZE: usize = 32768; | |
pub const SEGMENT_LEN: usize = L1D_CACHE_SIZE / INT_TYPE_BYTES; | |
/// An object for efficiently factorizing numbers. | |
/// | |
/// A `FactorSieve` is simply a precomputed array which stores the smallest | |
/// prime factor `p` for each integer `x`, up to some limit. The remaining | |
/// factors are found recursively by using the sieve to factorize `x / p` | |
/// until `x == 1`. | |
/// | |
/// For any non-composite `x` or zero, `sieve[x] = x`. | |
#[derive(Clone)] | |
pub struct FactorSieve { | |
sieve: Vec<Int>, | |
} | |
impl FactorSieve { | |
/// Compute a factor sieve. | |
pub fn new(limit: Int) -> Self { | |
assert!(limit >= 0); | |
FactorSieve { sieve: factor_sieve_segmented(limit - 1, SEGMENT_LEN) } | |
} | |
/// Factorize a number, one prime factor at a time (with duplicates) | |
/// | |
/// Produces prime factors of `x` one by one in order of increasing magnitude. | |
/// If `p` appears to the power `exp` in the factorization of `x`, then the | |
/// iterator will yield it `exp` times. | |
pub fn prime_iter(&self, x: Int) -> SieveIter { | |
assert!(x >= 0); | |
assert!(x < self.sieve.len() as Int); | |
SieveIter { x: x, sieve: &self.sieve } | |
} | |
/// Factorize a number into `(prime, exp)` pairs. | |
pub fn factorize(&self, x: Int) -> Vec<(Int, usize)> { | |
assert!(x >= 0); | |
// special cases to make life easier | |
if (x == 0) { return vec![(0, 1)]; } | |
if (x == 1) { return vec![]; } | |
let mut iter = self.prime_iter(x); | |
// the rest of this is basically trying to implement something like the | |
// following in iterator-speak (but there's no grouping iterator in std): | |
// iter.group_by(|p| p) | |
// .map(|(p, group)| (p, group.count())) | |
// initialize with first item | |
let mut prev = iter.next().unwrap(); // safe for x != 1 | |
let mut count = 1; | |
// count occurrences | |
let mut out = vec![]; | |
for p in iter { | |
if p != prev { | |
out.push((prev, count)); | |
prev = p; | |
count = 0; | |
} | |
count += 1; | |
} | |
out.push((prev, count)); // final group | |
out | |
} | |
/// Get the smallest prime factor of a number | |
#[inline] | |
pub fn get_prime(&self, x: Int) -> Int { self[x] } | |
pub fn into_vec(self) -> Vec<Int> { self.sieve } | |
pub fn as_vec(&self) -> &Vec<Int> { &self.sieve } | |
pub fn as_vec_mut(&mut self) -> &mut Vec<Int> { &mut self.sieve } | |
} | |
impl ::std::ops::Index<Int> for FactorSieve { | |
type Output = Int; | |
#[inline] | |
fn index(&self, index: Int) -> &Int { &self.sieve[index as usize] } | |
} | |
/// Get prime factors of a number one by one using a sieve. | |
pub struct SieveIter<'a> { | |
x: Int, | |
sieve: &'a Vec<Int>, | |
} | |
impl<'a> Iterator for SieveIter<'a> { | |
type Item = Int; | |
#[inline] | |
fn next(&mut self) -> Option<Int> { | |
match self.x { | |
1 => None, | |
x => { | |
let p = self.sieve[x as usize]; | |
self.x /= p; | |
Some(p) | |
} | |
} | |
} | |
} | |
//------------------------------------------------------ | |
// The implementation is adapted from a segmented Sieve of Eratosthenes | |
// found here: http://primesieve.org/segmented_sieve.html | |
// | |
// The idea in that code is to perform incremental work, using all prime | |
// factors to sieve one region at a time that fits in L1 cache. | |
// However, we do not gain all benefits of the original code due to | |
// fundamental changes in design (we're factorizing all numbers, not just | |
// finding primes), and probably due to other things I failed to consider | |
// properly when adapting the code. | |
// FIXME: make this more idiomatic, and less of a `mut`-fest | |
// FIXME: make this use exclusive limits | |
pub fn factor_sieve_segmented(limit: Int, segment_size: usize) -> Vec<Int> | |
{ | |
// Notice limit is inclusive in this function. | |
// Small cases where no primes have multiples to sieve. | |
debug_assert!(limit >= -1); | |
if limit < 4 { | |
return (0..limit+1).collect(); | |
} | |
// Consider that the smallest `x > p` for which `out[x] == p` is `p*p`. | |
// As a result, any primes for which `p*p > limit` would be pointless to use in | |
// trial division, and do not need to be tracked beyond their first encounter. | |
// | |
// The primes which DO matter are only those up to sqrt(limit), which we can | |
// collect in comparatively no time with a standard sieve of eratosthenes. | |
let primes: Vec<_> = { | |
let mut s_limit = (limit as f64).powf(0.5) as usize; | |
while s_limit * s_limit < limit as usize { s_limit += 1 }; // ensure large enough | |
prime_sieve_simple(s_limit).into_iter() | |
.enumerate().skip(2).filter(|&(_,x)| x) | |
.map(|(p,_)| p) | |
.collect() | |
}; | |
assert_eq!(primes.first(), Some(&2)); // `None` is prevented by special case at top | |
// Workspace containing one segment of the output at a time. | |
// The fill value of 0 is not significant and may as well be uninitialized. | |
let mut workspace = filled_vec(segment_size, 0); | |
let mut sieve = filled_vec((limit + 1) as usize, 0); | |
// Vector which tracks locations of the first multiple of each prime in the next segment. | |
let mut offsets = vec![]; | |
// Iterator for small primes that are not yet tracked | |
let mut new_primes = primes.iter().cloned().peekable(); | |
// Iterate over segments | |
let lows = (0..).step_by(segment_size); | |
for (low, segment) in lows.zip(sieve.chunks_mut(segment_size)) { | |
// Fill default values for this segment, which are equal to the sieve index. | |
// These values will be kept by 0, 1, and any primes. | |
for j in 0..workspace.len() { | |
workspace[j] = (low + j) as Int; | |
} | |
let high = low + segment.len(); | |
// Begin tracking the primes whose first composite is in this segment | |
while let Some(p) = new_primes.peek().cloned() { | |
if p*p >= high { break; } // outside segment | |
new_primes.next(); // consume | |
offsets.push(p*p - low); | |
} | |
// Collect the offsets | |
let mut it = offsets.iter_mut().zip(primes.iter()); | |
// A small wheel optimization: Handle p = 2 separately so we can skip | |
// even multiples of odd primes. | |
if let Some((start2, _)) = it.next() { | |
for i in (*start2..workspace.len()).step_by(2) { | |
workspace[i] = 2; | |
} | |
// FIXME is this right? I wrote this a while ago, and looking | |
// at it now it would seem that I also intended to reduce start2 | |
// modulo something. | |
// Currently segment size is never odd so there is no impact | |
*start2 += (segment_size % 2); | |
} | |
// Sieve the current segment with primes >= 3. | |
// Do them in reverse order so that the smallest ones overwrite larger ones. | |
for (start, &p) in make_double_ended(it).rev() { | |
// The initial value of `start` is `x - low`, where `x` is the first | |
// odd multiple of `p` inside this segment. | |
let mut j = *start; | |
let step = (2*p) as usize; | |
while j < segment_size { | |
workspace[j] = p as Int; | |
j += step; | |
} | |
// j is now equal to the offset of the first odd multiple outside this segment. | |
// Subtract segment_size to account for the change in offset between main loop iterations. | |
// (note: this correctly accounts for the case where p > segment_size) | |
*start = j - segment_size; | |
} | |
copy(&workspace[..], segment); | |
} | |
sieve | |
} | |
// helper to identify small primes | |
// limit is INCLUSIVE | |
fn prime_sieve_simple(limit: usize) -> Vec<bool> { | |
let mut sieve = filled_vec(limit + 1, true); | |
for i in 2.. { | |
if i * i > limit { break } | |
if (sieve[i]) { | |
for j in (i*i..limit+1).step_by(i) { | |
sieve[j] = false; | |
} | |
} | |
} | |
sieve | |
} | |
//---------------------------------------------------- | |
// for benchmarking against the segmented method | |
// limit is EXCLUSIVE | |
fn factor_sieve_simple(limit: Int) -> Vec<Int> { | |
let mut sieve: Vec<_> = (0..limit).collect(); | |
for x in (2..limit).step_by(2) { sieve[x as usize] = 2; } | |
for x in (3..).step_by(2) { | |
let first = x*x; | |
if first > limit { break } | |
if sieve[x as usize] == x { | |
for m in (first..limit).step_by(2*x) { | |
if sieve[m as usize] == m { | |
sieve[m as usize] = x; | |
} | |
} | |
} | |
} | |
sieve | |
} | |
fn make_double_ended<T,I:Iterator<Item=T>>(iter: I) -> ::std::vec::IntoIter<T> { | |
iter.collect::<Vec<_>>().into_iter() | |
} | |
//--------------------------------------- | |
fn filled_vec<T:Clone>(size: usize, value: T) -> Vec<T> { | |
let mut vec = vec![]; | |
vec.resize(size, value); | |
vec | |
} | |
fn fill<T:Clone>(slice: &mut [T], value: T) { | |
for x in slice.iter_mut() { *x = value.clone() } | |
} | |
fn copy<T:Copy>(source: &[T], dest: &mut [T]) { | |
assert!(source.len() >= dest.len()); | |
for i in 0..dest.len() { | |
dest[i] = source[i]; | |
} | |
} | |
#[test] | |
fn test() { | |
// a "definitely correct" sieve formed by trial division. | |
// It is large enough to include values for which various certain | |
// incorrectly-implemented optimizations could give the wrong prime | |
// factor (e.g. a certain easy mistake results in giving 3 for 18, | |
// 5 for 45, and 7 for 175; and the first incorrect value may be pushed | |
// even further back if a large wheel optimization is used) | |
let mut correct = vec![0,1]; | |
for i in 2..2000 { | |
for k in 2..(i+1) { | |
if i % k == 0 { | |
correct.push(k); | |
break; | |
} | |
} | |
} | |
for (i,x) in correct.clone().into_iter().enumerate() { | |
if x == 3 { | |
println!("{}", i); | |
} | |
} | |
// a "definitely definitely correct" sieve to verify that one against, | |
// up to a point | |
assert_eq!(correct.len(), 2000); | |
assert_eq!(&correct[..12], &vec![0,1,2,3,2,5,2,7,2,3,2,11][..]); | |
// try various sizes, in particular: | |
for len in vec![ | |
0, 1, // Obvious edge cases (length 0, 1) | |
2, 3, // More subtle edge case (no multiples to cross off) | |
9, 10, 11, // End in a prime square; a composite; and a prime | |
correct.len(), // Catch erroneous values for larger x | |
] { | |
let actual = FactorSieve::new(len as Int).into_vec(); | |
assert_eq!(&actual[..], &correct[..len]); | |
} | |
} | |
#[bench] | |
fn bench_simple(b: &mut Bencher) { | |
b.iter(|| { | |
let n = 10*SEGMENT_LEN + 20; | |
let n = 100_000_000; | |
factor_sieve_simple(n as Int).last().cloned() | |
}); | |
} | |
#[bench] | |
fn bench_segmented(b: &mut Bencher) { | |
b.iter(|| { | |
let n = 10*SEGMENT_LEN + 20; // extra to force an incomplete segment | |
let n = 100_000_000; | |
FactorSieve::new(n as Int).into_vec().last().cloned() | |
}); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment