ExpHP · August 18, 2016 15:34
diff --git a/factor_sieve.rs b/factor_sieve.rs
 #![feature(step_by)]
 #![feature(test)]
 pub extern crate test;

 use test::Bencher;

 // Using a segmented sieve method for factorization.

 // This has a couple of speed advantages that are not possible to offer
 // currently through the abstractions present in my `factor` crate.
 //
 // * A more efficient method of generating a factorization sieve (which
 //   in `factor` is known as a `ListFactorizer`) that builds it in segments.
 // * Linear optimizations in the construction of Factorization objects
 //   from the list, which avoid requirements like `HashMap` by taking advantage
 //   of the fact that the factorization sieve we generate always produces the
 //   smallest factor. (hence factors are produced in order, making it easy
 //   to count duplicates)

 pub type Int = i32;
 pub const INT_TYPE_BYTES: usize = 4; // ::std::mem::size_of is not a const fn, because lolidunno

 // The goal is to work on the sieve in segments that fit in L1 cache.
 // Unfortunately I do not know how best to make this configurable.
 const L1D_CACHE_SIZE: usize = 32768;
 pub const SEGMENT_LEN: usize = L1D_CACHE_SIZE / INT_TYPE_BYTES;

 /// An object for efficiently factorizing numbers.
 ///
 /// A `FactorSieve` is simply a precomputed array which stores the smallest
 /// prime factor `p` for each integer `x`, up to some limit.  The remaining
 /// factors are found recursively by using the sieve to factorize `x / p`
 /// until `x == 1`.
 ///
 /// For any non-composite `x` or zero, `sieve[x] = x`.
 #[derive(Clone)]
 pub struct FactorSieve {
 	sieve: Vec<Int>,
 }

 impl FactorSieve {
 	/// Compute a factor sieve.
 	pub fn new(limit: Int) -> Self {
 		assert!(limit >= 0);
 		FactorSieve { sieve: factor_sieve_segmented(limit - 1, SEGMENT_LEN) }
 	}

 	/// Factorize a number, one prime factor at a time (with duplicates)
 	///
 	/// Produces prime factors of `x` one by one in order of increasing magnitude.
 	/// If `p` appears to the power `exp` in the factorization of `x`, then the
 	/// iterator will yield it `exp` times.
 	pub fn prime_iter(&self, x: Int) -> SieveIter {
 		assert!(x >= 0);
 		assert!(x < self.sieve.len() as Int);
 		SieveIter { x: x, sieve: &self.sieve }
 	}

 	/// Factorize a number into `(prime, exp)` pairs.
 	pub fn factorize(&self, x: Int) -> Vec<(Int, usize)> {
 		assert!(x >= 0);
 		// special cases to make life easier
 		if (x == 0) { return vec![(0, 1)]; }
 		if (x == 1) { return vec![]; }

 		let mut iter = self.prime_iter(x);

 		// the rest of this is basically trying to implement something like the
 		// following in iterator-speak (but there's no grouping iterator in std):
 		//    iter.group_by(|p| p)
 		//        .map(|(p, group)| (p, group.count()))

 		// initialize with first item
 		let mut prev = iter.next().unwrap(); // safe for x != 1
 		let mut count = 1;

 		// count occurrences
 		let mut out = vec![];
 		for p in iter {
 			if p != prev {
 				out.push((prev, count));
 				prev = p;
 				count = 0;
 			}
 			count += 1;
 		}
 		out.push((prev, count)); // final group
 		out
 	}

 	/// Get the smallest prime factor of a number
 	#[inline]
 	pub fn get_prime(&self, x: Int) -> Int { self[x] }

 	pub fn into_vec(self) -> Vec<Int> { self.sieve }
 	pub fn as_vec(&self) -> &Vec<Int> { &self.sieve }
 	pub fn as_vec_mut(&mut self) -> &mut Vec<Int> { &mut self.sieve }
 }

 impl ::std::ops::Index<Int> for FactorSieve {
 	type Output = Int;
 	#[inline]
 	fn index(&self, index: Int) -> &Int { &self.sieve[index as usize] }
 }

 /// Get prime factors of a number one by one using a sieve.
 pub struct SieveIter<'a> {
 	x: Int,
 	sieve: &'a Vec<Int>,
 }
 impl<'a> Iterator for SieveIter<'a> {
 	type Item = Int;
 	#[inline]
 	fn next(&mut self) -> Option<Int> {
 		match self.x {
 			1 => None,
 			x => {
 				let p = self.sieve[x as usize];
 				self.x /= p;
 				Some(p)
 			}
 		}
 	}
 }


 //------------------------------------------------------

 // The implementation is adapted from a segmented Sieve of Eratosthenes
 // found here: http://primesieve.org/segmented_sieve.html
 //
 // The idea in that code is to perform incremental work, using all prime
 // factors to sieve one region at a time that fits in L1 cache.
 // However, we do not gain all benefits of the original code due to
 // fundamental changes in design (we're factorizing all numbers, not just
 // finding primes), and probably due to other things I failed to consider
 // properly when adapting the code.

 // FIXME: make this more idiomatic, and less of a `mut`-fest
 // FIXME: make this use exclusive limits
 pub fn factor_sieve_segmented(limit: Int, segment_size: usize) -> Vec<Int>
 {
 	// Notice limit is inclusive in this function.

 	// Small cases where no primes have multiples to sieve.
 	debug_assert!(limit >= -1);
 	if limit < 4 {
 		return (0..limit+1).collect();
 	}

 	// Consider that the smallest `x > p` for which `out[x] == p` is `p*p`.
 	// As a result, any primes for which `p*p > limit` would be pointless to use in
 	// trial division, and do not need to be tracked beyond their first encounter.
 	//
 	// The primes which DO matter are only those up to sqrt(limit), which we can
 	// collect in comparatively no time with a standard sieve of eratosthenes.
 	let primes: Vec<_> = {
 		let mut s_limit = (limit as f64).powf(0.5) as usize;
 		while s_limit * s_limit < limit as usize { s_limit += 1 }; // ensure large enough
 		prime_sieve_simple(s_limit).into_iter()
 			.enumerate().skip(2).filter(|&(_,x)| x)
 			.map(|(p,_)| p)
 			.collect()
 	};
 	assert_eq!(primes.first(), Some(&2)); // `None` is prevented by special case at top

 	// Workspace containing one segment of the output at a time.
 	// The fill value of 0 is not significant and may as well be uninitialized.
 	let mut workspace = filled_vec(segment_size, 0);
 	let mut sieve = filled_vec((limit + 1) as usize, 0);

 	// Vector which tracks locations of the first multiple of each prime in the next segment.
 	let mut offsets = vec![];
 	// Iterator for small primes that are not yet tracked
 	let mut new_primes = primes.iter().cloned().peekable();

 	// Iterate over segments
 	let lows = (0..).step_by(segment_size);
 	for (low, segment) in lows.zip(sieve.chunks_mut(segment_size)) {

 		// Fill default values for this segment, which are equal to the sieve index.
 		// These values will be kept by 0, 1, and any primes.
 		for j in 0..workspace.len() {
 			workspace[j] = (low + j) as Int;
 		}
 		let high = low + segment.len();

 		// Begin tracking the primes whose first composite is in this segment
 		while let Some(p) = new_primes.peek().cloned() {
 			if p*p >= high { break; } // outside segment

 			new_primes.next(); // consume
 			offsets.push(p*p - low);
 		}

 		// Collect the offsets
 		let mut it = offsets.iter_mut().zip(primes.iter());
 		

 		// A small wheel optimization: Handle p = 2 separately so we can skip
 		//  even multiples of odd primes.
 		if let Some((start2, _)) = it.next() {
 			for i in (*start2..workspace.len()).step_by(2) {
 				workspace[i] = 2;
 			}
 			// FIXME is this right? I wrote this a while ago, and looking
 			// at it now it would seem that I also intended to reduce start2
 			// modulo something.
 			// Currently segment size is never odd so there is no impact
 			*start2 += (segment_size % 2);
 		}

 		// Sieve the current segment with primes >= 3.
 		// Do them in reverse order so that the smallest ones overwrite larger ones.
 		for (start, &p) in make_double_ended(it).rev() {

 			// The initial value of `start` is `x - low`, where `x` is the first
 			// odd multiple of `p` inside this segment.
 			let mut j = *start;
 			let step = (2*p) as usize;
 			while j < segment_size {
 				workspace[j] = p as Int;
 				j += step;
 			}

 			// j is now equal to the offset of the first odd multiple outside this segment.
 			// Subtract segment_size to account for the change in offset between main loop iterations.
 			// (note: this correctly accounts for the case where p > segment_size)
 			*start = j - segment_size;
 		}
 		copy(&workspace[..], segment);
 	}
 	sieve
 }

 // helper to identify small primes
 // limit is INCLUSIVE
 fn prime_sieve_simple(limit: usize) -> Vec<bool> {
  let mut sieve = filled_vec(limit + 1, true);
  for i in 2.. {
    if i * i > limit { break }
    if (sieve[i]) {
      for j in (i*i..limit+1).step_by(i) {
        sieve[j] = false;
      }
    }
  }
  sieve
 }

 //----------------------------------------------------

 // for benchmarking against the segmented method
 // limit is EXCLUSIVE
 fn factor_sieve_simple(limit: Int) -> Vec<Int> {
 	let mut sieve: Vec<_> = (0..limit).collect();
 	for x in (2..limit).step_by(2) { sieve[x as usize] = 2; }
 	for x in (3..).step_by(2) {
 		let first = x*x;
 		if first > limit { break }
 		if sieve[x as usize] == x {
 			for m in (first..limit).step_by(2*x) {
 				if sieve[m as usize] == m {
 					sieve[m as usize] = x;
 				}
 			}
 		}
 	}
 	sieve
 }

 fn make_double_ended<T,I:Iterator<Item=T>>(iter: I) -> ::std::vec::IntoIter<T> {
 	iter.collect::<Vec<_>>().into_iter()
 }

 //---------------------------------------

 fn filled_vec<T:Clone>(size: usize, value: T) -> Vec<T> {
 	let mut vec = vec![];
 	vec.resize(size, value);
 	vec
 }

 fn fill<T:Clone>(slice: &mut [T], value: T) {
 	for x in slice.iter_mut() { *x = value.clone() }
 }

 fn copy<T:Copy>(source: &[T], dest: &mut [T]) {
 	assert!(source.len() >= dest.len());
 	for i in 0..dest.len() {
 		dest[i] = source[i];
 	}
 }
 #[test]
 fn test() {
 	// a "definitely correct" sieve formed by trial division.
 	// It is large enough to include values for which various certain
 	// incorrectly-implemented optimizations could give the wrong prime
 	// factor (e.g. a certain easy mistake results in giving 3 for 18,
 	// 5 for 45, and 7 for 175; and the first incorrect value may be pushed
 	// even further back if a large wheel optimization is used)
 	let mut correct = vec![0,1];
 	for i in 2..2000 {
 		for k in 2..(i+1) {
 			if i % k == 0 {
 				correct.push(k);
 				break;
 			}
 		}
 	}
 	for (i,x) in correct.clone().into_iter().enumerate() {
 		if x == 3 {
 			println!("{}", i);
 		}
 	}
 	// a "definitely definitely correct" sieve to verify that one against,
 	// up to a point
 	assert_eq!(correct.len(), 2000);
 	assert_eq!(&correct[..12], &vec![0,1,2,3,2,5,2,7,2,3,2,11][..]);

 	// try various sizes, in particular:
 	for len in vec![
 		0, 1,   // Obvious edge cases (length 0, 1)
 		2, 3,   // More subtle edge case (no multiples to cross off)
 		9, 10, 11, // End in a prime square; a composite; and a prime
 		correct.len(), // Catch erroneous values for larger x
 	] {
 		let actual = FactorSieve::new(len as Int).into_vec();
 		assert_eq!(&actual[..], &correct[..len]);
 	}
 }

 #[bench]
 fn bench_simple(b: &mut Bencher) {
 	b.iter(|| {
 		let n = 10*SEGMENT_LEN + 20;
 		let n = 100_000_000;
 		factor_sieve_simple(n as Int).last().cloned()
 	});
 }
 #[bench]
 fn bench_segmented(b: &mut Bencher) {
 	b.iter(|| {
 		let n = 10*SEGMENT_LEN + 20; // extra to force an incomplete segment
 		let n = 100_000_000;
 		FactorSieve::new(n as Int).into_vec().last().cloned()
 	});
 }
	#![feature(step_by)]
	#![feature(test)]
	pub extern crate test;

	use test::Bencher;

	// Using a segmented sieve method for factorization.

	// This has a couple of speed advantages that are not possible to offer
	// currently through the abstractions present in my `factor` crate.
	//
	// * A more efficient method of generating a factorization sieve (which
	// in `factor` is known as a `ListFactorizer`) that builds it in segments.
	// * Linear optimizations in the construction of Factorization objects
	// from the list, which avoid requirements like `HashMap` by taking advantage
	// of the fact that the factorization sieve we generate always produces the
	// smallest factor. (hence factors are produced in order, making it easy
	// to count duplicates)

	pub type Int = i32;
	pub const INT_TYPE_BYTES: usize = 4; // ::std::mem::size_of is not a const fn, because lolidunno

	// The goal is to work on the sieve in segments that fit in L1 cache.
	// Unfortunately I do not know how best to make this configurable.
	const L1D_CACHE_SIZE: usize = 32768;
	pub const SEGMENT_LEN: usize = L1D_CACHE_SIZE / INT_TYPE_BYTES;

	/// An object for efficiently factorizing numbers.
	///
	/// A `FactorSieve` is simply a precomputed array which stores the smallest
	/// prime factor `p` for each integer `x`, up to some limit. The remaining
	/// factors are found recursively by using the sieve to factorize `x / p`
	/// until `x == 1`.
	///
	/// For any non-composite `x` or zero, `sieve[x] = x`.
	#[derive(Clone)]
	pub struct FactorSieve {
	sieve: Vec<Int>,
	}

	impl FactorSieve {
	/// Compute a factor sieve.
	pub fn new(limit: Int) -> Self {
	assert!(limit >= 0);
	FactorSieve { sieve: factor_sieve_segmented(limit - 1, SEGMENT_LEN) }
	}

	/// Factorize a number, one prime factor at a time (with duplicates)
	///
	/// Produces prime factors of `x` one by one in order of increasing magnitude.
	/// If `p` appears to the power `exp` in the factorization of `x`, then the
	/// iterator will yield it `exp` times.
	pub fn prime_iter(&self, x: Int) -> SieveIter {
	assert!(x >= 0);
	assert!(x < self.sieve.len() as Int);
	SieveIter { x: x, sieve: &self.sieve }
	}

	/// Factorize a number into `(prime, exp)` pairs.
	pub fn factorize(&self, x: Int) -> Vec<(Int, usize)> {
	assert!(x >= 0);
	// special cases to make life easier
	if (x == 0) { return vec![(0, 1)]; }
	if (x == 1) { return vec![]; }

	let mut iter = self.prime_iter(x);

	// the rest of this is basically trying to implement something like the
	// following in iterator-speak (but there's no grouping iterator in std):
	// iter.group_by(\|p\| p)
	// .map(\|(p, group)\| (p, group.count()))

	// initialize with first item
	let mut prev = iter.next().unwrap(); // safe for x != 1
	let mut count = 1;

	// count occurrences
	let mut out = vec![];
	for p in iter {
	if p != prev {
	out.push((prev, count));
	prev = p;
	count = 0;
	}
	count += 1;
	}
	out.push((prev, count)); // final group
	out
	}

	/// Get the smallest prime factor of a number
	#[inline]
	pub fn get_prime(&self, x: Int) -> Int { self[x] }

	pub fn into_vec(self) -> Vec<Int> { self.sieve }
	pub fn as_vec(&self) -> &Vec<Int> { &self.sieve }
	pub fn as_vec_mut(&mut self) -> &mut Vec<Int> { &mut self.sieve }
	}

	impl ::std::ops::Index<Int> for FactorSieve {
	type Output = Int;
	#[inline]
	fn index(&self, index: Int) -> &Int { &self.sieve[index as usize] }
	}

	/// Get prime factors of a number one by one using a sieve.
	pub struct SieveIter<'a> {
	x: Int,
	sieve: &'a Vec<Int>,
	}
	impl<'a> Iterator for SieveIter<'a> {
	type Item = Int;
	#[inline]
	fn next(&mut self) -> Option<Int> {
	match self.x {
	1 => None,
	x => {
	let p = self.sieve[x as usize];
	self.x /= p;
	Some(p)
	}
	}
	}
	}


	//------------------------------------------------------

	// The implementation is adapted from a segmented Sieve of Eratosthenes
	// found here: http://primesieve.org/segmented_sieve.html
	//
	// The idea in that code is to perform incremental work, using all prime
	// factors to sieve one region at a time that fits in L1 cache.
	// However, we do not gain all benefits of the original code due to
	// fundamental changes in design (we're factorizing all numbers, not just
	// finding primes), and probably due to other things I failed to consider
	// properly when adapting the code.

	// FIXME: make this more idiomatic, and less of a `mut`-fest
	// FIXME: make this use exclusive limits
	pub fn factor_sieve_segmented(limit: Int, segment_size: usize) -> Vec<Int>
	{
	// Notice limit is inclusive in this function.

	// Small cases where no primes have multiples to sieve.
	debug_assert!(limit >= -1);
	if limit < 4 {
	return (0..limit+1).collect();
	}

	// Consider that the smallest `x > p` for which `out[x] == p` is `p*p`.
	// As a result, any primes for which `p*p > limit` would be pointless to use in
	// trial division, and do not need to be tracked beyond their first encounter.
	//
	// The primes which DO matter are only those up to sqrt(limit), which we can
	// collect in comparatively no time with a standard sieve of eratosthenes.
	let primes: Vec<_> = {
	let mut s_limit = (limit as f64).powf(0.5) as usize;
	while s_limit * s_limit < limit as usize { s_limit += 1 }; // ensure large enough
	prime_sieve_simple(s_limit).into_iter()
	.enumerate().skip(2).filter(\|&(_,x)\| x)
	.map(\|(p,_)\| p)
	.collect()
	};
	assert_eq!(primes.first(), Some(&2)); // `None` is prevented by special case at top

	// Workspace containing one segment of the output at a time.
	// The fill value of 0 is not significant and may as well be uninitialized.
	let mut workspace = filled_vec(segment_size, 0);
	let mut sieve = filled_vec((limit + 1) as usize, 0);

	// Vector which tracks locations of the first multiple of each prime in the next segment.
	let mut offsets = vec![];
	// Iterator for small primes that are not yet tracked
	let mut new_primes = primes.iter().cloned().peekable();

	// Iterate over segments
	let lows = (0..).step_by(segment_size);
	for (low, segment) in lows.zip(sieve.chunks_mut(segment_size)) {

	// Fill default values for this segment, which are equal to the sieve index.
	// These values will be kept by 0, 1, and any primes.
	for j in 0..workspace.len() {
	workspace[j] = (low + j) as Int;
	}
	let high = low + segment.len();

	// Begin tracking the primes whose first composite is in this segment
	while let Some(p) = new_primes.peek().cloned() {
	if p*p >= high { break; } // outside segment

	new_primes.next(); // consume
	offsets.push(p*p - low);
	}

	// Collect the offsets
	let mut it = offsets.iter_mut().zip(primes.iter());


	// A small wheel optimization: Handle p = 2 separately so we can skip
	// even multiples of odd primes.
	if let Some((start2, _)) = it.next() {
	for i in (*start2..workspace.len()).step_by(2) {
	workspace[i] = 2;
	}
	// FIXME is this right? I wrote this a while ago, and looking
	// at it now it would seem that I also intended to reduce start2
	// modulo something.
	// Currently segment size is never odd so there is no impact
	*start2 += (segment_size % 2);
	}

	// Sieve the current segment with primes >= 3.
	// Do them in reverse order so that the smallest ones overwrite larger ones.
	for (start, &p) in make_double_ended(it).rev() {

	// The initial value of `start` is `x - low`, where `x` is the first
	// odd multiple of `p` inside this segment.
	let mut j = *start;
	let step = (2*p) as usize;
	while j < segment_size {
	workspace[j] = p as Int;
	j += step;
	}

	// j is now equal to the offset of the first odd multiple outside this segment.
	// Subtract segment_size to account for the change in offset between main loop iterations.
	// (note: this correctly accounts for the case where p > segment_size)
	*start = j - segment_size;
	}
	copy(&workspace[..], segment);
	}
	sieve
	}

	// helper to identify small primes
	// limit is INCLUSIVE
	fn prime_sieve_simple(limit: usize) -> Vec<bool> {
	let mut sieve = filled_vec(limit + 1, true);
	for i in 2.. {
	if i * i > limit { break }
	if (sieve[i]) {
	for j in (i*i..limit+1).step_by(i) {
	sieve[j] = false;
	}
	}
	}
	sieve
	}

	//----------------------------------------------------

	// for benchmarking against the segmented method
	// limit is EXCLUSIVE
	fn factor_sieve_simple(limit: Int) -> Vec<Int> {
	let mut sieve: Vec<_> = (0..limit).collect();
	for x in (2..limit).step_by(2) { sieve[x as usize] = 2; }
	for x in (3..).step_by(2) {
	let first = x*x;
	if first > limit { break }
	if sieve[x as usize] == x {
	for m in (first..limit).step_by(2*x) {
	if sieve[m as usize] == m {
	sieve[m as usize] = x;
	}
	}
	}
	}
	sieve
	}

	fn make_double_ended<T,I:Iterator<Item=T>>(iter: I) -> ::std::vec::IntoIter<T> {
	iter.collect::<Vec<_>>().into_iter()
	}

	//---------------------------------------

	fn filled_vec<T:Clone>(size: usize, value: T) -> Vec<T> {
	let mut vec = vec![];
	vec.resize(size, value);
	vec
	}

	fn fill<T:Clone>(slice: &mut [T], value: T) {
	for x in slice.iter_mut() { *x = value.clone() }
	}

	fn copy<T:Copy>(source: &[T], dest: &mut [T]) {
	assert!(source.len() >= dest.len());
	for i in 0..dest.len() {
	dest[i] = source[i];
	}
	}
	#[test]
	fn test() {
	// a "definitely correct" sieve formed by trial division.
	// It is large enough to include values for which various certain
	// incorrectly-implemented optimizations could give the wrong prime
	// factor (e.g. a certain easy mistake results in giving 3 for 18,
	// 5 for 45, and 7 for 175; and the first incorrect value may be pushed
	// even further back if a large wheel optimization is used)
	let mut correct = vec![0,1];
	for i in 2..2000 {
	for k in 2..(i+1) {
	if i % k == 0 {
	correct.push(k);
	break;
	}
	}
	}
	for (i,x) in correct.clone().into_iter().enumerate() {
	if x == 3 {
	println!("{}", i);
	}
	}
	// a "definitely definitely correct" sieve to verify that one against,
	// up to a point
	assert_eq!(correct.len(), 2000);
	assert_eq!(&correct[..12], &vec![0,1,2,3,2,5,2,7,2,3,2,11][..]);

	// try various sizes, in particular:
	for len in vec![
	0, 1, // Obvious edge cases (length 0, 1)
	2, 3, // More subtle edge case (no multiples to cross off)
	9, 10, 11, // End in a prime square; a composite; and a prime
	correct.len(), // Catch erroneous values for larger x
	] {
	let actual = FactorSieve::new(len as Int).into_vec();
	assert_eq!(&actual[..], &correct[..len]);
	}
	}

	#[bench]
	fn bench_simple(b: &mut Bencher) {
	b.iter(\|\| {
	let n = 10*SEGMENT_LEN + 20;
	let n = 100_000_000;
	factor_sieve_simple(n as Int).last().cloned()
	});
	}
	#[bench]
	fn bench_segmented(b: &mut Bencher) {
	b.iter(\|\| {
	let n = 10*SEGMENT_LEN + 20; // extra to force an incomplete segment
	let n = 100_000_000;
	FactorSieve::new(n as Int).into_vec().last().cloned()
	});
	}