Skip to content

Instantly share code, notes, and snippets.

@vnayar
Last active October 15, 2018 19:46
Show Gist options
  • Save vnayar/79e2d0a9850833b8859dd9f08997b4d7 to your computer and use it in GitHub Desktop.
Save vnayar/79e2d0a9850833b8859dd9f08997b4d7 to your computer and use it in GitHub Desktop.
/**
* A D port of the Nim program:
* https://gist.github.com/jzakiya/6c7e1868bd749a6b1add62e3e3b2341e
*
* Related code, papers, and tutorials, are available here:
* https://www.scribd.com/doc/228155369/The-Segmented-Sieve-of-Zakiya-SSoZ
* https://www.scribd.com/document/266461408/Primes-Utils-Handbook
*/
module twinprimes_ssoz;
import std.algorithm : maxElement, min, sort, swap;
import std.array : array;
import std.typecons : Rebindable;
import std.conv : to;
import std.datetime.stopwatch : StopWatch;
import std.math;
import std.numeric : gcd;
import std.parallelism : parallel, totalCPUs;
import std.range : iota;
import std.stdio : readf, write, writeln;
/// Compute modular inverse a^(-1) of a to base b, e.g. a*(a^-1) mod b = 1.
int modInv(int a0, int b0) {
int a = a0;
int b = b0;
int x0 = 0;
int result = 1;
if (b == 1) {
return result;
}
while (a > 1) {
auto q = a / b;
a = a % b;
swap(a, b);
result -= q * x0;
swap(x0, result);
}
if (result < 0) {
result += b0;
}
return result;
}
/// Prime Generator Parameters
struct PgParameters {
uint modulus;
size_t residueCount;
size_t twinPairsCount;
uint[] residues;
uint[] residueTwinPairs;
uint[] residueInverses;
}
/// Creates constant parameters for given PrimeGenerator at compile time.
PgParameters genPgParameters(int prime)() {
pragma(msg, "generating parameters for P", prime);
uint[] primes = [2, 3, 5, 7, 11, 13, 17, 19, 23];
// Compute PG's modulus.
uint modpg = 1;
foreach (prm; primes) {
modpg *= prm;
if (prm == prime)
break;
}
// Generate PG's residue values.
uint[] residues;
uint pc = 5;
uint inc = 2;
while (pc < modpg / 2) {
if (gcd(modpg, pc) == 1) {
residues ~= pc;
residues ~= modpg - pc;
}
pc += inc;
inc = inc ^ 0b110;
}
residues = sort(residues).array;
residues ~= modpg - 1;
residues ~= modpg + 1;
size_t rescnt = residues.length; // PG's residues count
// Extract upper twinpair residues here.
uint[] restwins;
uint j = 0;
while (j < rescnt - 1) {
if (residues[j] + 2 == residues[j + 1]) {
restwins ~= residues[j + 1];
j++;
}
j++;
}
size_t twinpairs = restwins.length; // twinpairs count
// Create PG's residues inverses here.
uint[] inverses;
foreach (res; residues) {
inverses ~= modInv(res, modpg);
}
return PgParameters(modpg, rescnt, twinpairs, residues, restwins, inverses);
}
// Generate at compile time the parameters for PGs P5-P17.
enum parametersp5 = genPgParameters!(5);
enum parametersp7 = genPgParameters!(7);
enum parametersp11 = genPgParameters!(11);
enum parametersp13 = genPgParameters!(13);
// TODO: Discover why this causes compilation errors.
//immutable parametersp17 = genPGparameters!(17);
// Global parameters
shared uint pcnt; // Number of primes from r1..sqrt(N)
shared ulong num; // Adjusted (odd) input value
shared ulong twinscnt; // Number of twinprimes <= N
shared ulong[] primes; // List of primes r1..sqrt(N)
shared uint kb = 0; // Segment size for each seg restrack
shared uint[] cnts; // Hold twinprime counts for seg bytes
shared ulong[] lastwins; // Holds last twin <= num in each thread
shared uint[] pos; // Convert residue val to its residues index val
// faster than `residues.find(residue)
shared PgParameters pgParameters;
shared uint bn; // Segment size factor for PG and input number
/**
* Select at runtime best PG and segment size factor to use for input value.
* These are good estimates derived from PG data profiling. Can be improved.
*/
void selectPg(ulong num) {
if (num < 10_000_000) {
debug writeln("Selecting parametersp5");
pgParameters = parametersp5;
bn = 16;
} else if (num < 1_100_000_000) {
debug writeln("Selecting parametersp7");
pgParameters = parametersp7;
bn = 32;
} else if (num < 35_500_000_000uL) {
debug writeln("Selecting parametersp11");
pgParameters = parametersp11;
bn = 64;
} else /*if (num < 15_000_000_000_000uL)*/ {
debug writeln("Selecting parametersp13");
pgParameters = parametersp13;
if (num > 7_000_000_000_000uL) {
bn = 384;
} else if (num > 2_500_000_000_000uL) {
bn = 320;
} else if (num > 250_000_000_000uL) {
bn = 196;
} else {
bn = 96;
}
}/* else {
// TODO: Fix when parametersp17 is properly computed.
// pgParameters = PgParameters(parametersp17);
pgParameters = parametersp13;
bn = 384;
}*/
cnts = new uint[](pgParameters.twinPairsCount); // twinprime sums for seg bytes
pos = new uint[](pgParameters.modulus); // Create modpg size array to
foreach(i; 0 .. pgParameters.residueCount) {
pos[pgParameters.residues[i] - 2] = cast(uint) i; // Convert residue val -> indx
}
lastwins = new ulong[](pgParameters.twinPairsCount); // Holds last twin per thread.
}
/**
* Use PG to finds primes upto val.
* Compute the list of primes r1..sqrt(input_num), and store in global
* 'primes' array, and store their count in global var 'pcnt'.
* Any algorithm (fast|small) is usable. Here the SoZ for the PG is used.
*/
void sozPg(ulong val) {
uint md = pgParameters.modulus; // PG's modulus value
ulong rscnt = pgParameters.residueCount; // PG's residue count
shared const(uint[]) res = pgParameters.residues; // PG's residues list
ulong num = (val - 1) | 1; // if val even then subtract 1
ulong k = num / md; // compute its residue group value
ulong modk = md * k; // compute the resgroup base value
uint r = 0; // from 1st residue in num's resgroup
while (num >= modk + res[r]) { // find last pc val|position <= num
r += 1;
}
ulong maxpcs = k * rscnt + r; // max number of prime candidates <= num
bool[] prms = new bool[](maxpcs); // array of prime candidates set False
ulong sqrtN = cast(ulong) sqrt(cast(double) num); // compute integer sqrt of input num
// initialize sieve parameters
modk = 0;
r = -1;
k = 0;
// mark the multiples of the primes r1..sqrtN in 'prms'
foreach (prm; prms) { // from list of pc positions in prms
r += 1;
if (r == rscnt) {
r = 0;
modk += md;
k += 1;
}
if (prm)
continue; // If pc not prime go to next location
uint prm_r = res[r]; // if prime save its residue value
ulong prime = modk + prm_r; // numerate the prime value
if (prime > sqrtN)
break; // We're finished if it's > sqrtN
ulong prmstep = prime * rscnt; // prime's stepsize to mark its mults
foreach (ri; res) { // mark prime's multiples in prms
ulong prod = prm_r * ri - 2; // compute cross-product for r|ri pair
// compute resgroup val of 1st prime multiple for each restrack
// starting there, mark all prime multiples on restrack upto end of prms
ulong prm_mult = (k * (prime + ri) + prod / md) * rscnt + pos[prod % md];
while (prm_mult < maxpcs) {
prms[prm_mult] = true;
prm_mult += prmstep;
}
}
}
// prms now contains the nonprime positions for the prime candidates r1..N
// extract primes into global var 'primes' and count into global var 'pcnt'
primes = []; // create empty dynamic array for primes
modk = 0; // initialize loop parameters
r = -1;
foreach (prm; prms) { // numerate|store primes from pcs list
r += 1;
if (r == rscnt) {
r = 0;
modk += md;
}
if (!prm)
primes ~= modk + res[r]; // put prime in global 'primes' list
}
pcnt = cast(uint) primes.length; // set global count of primes
}
/*
proc printprms(Kn: int, Ki: uint64, indx: int, seg: seq[uint8])=
## Print twinprimes for given twinpair for given segment slice.
## Primes will not be displayed in sorted order, collect|sort later for that.
var modk = Ki * modpg.uint # base value of 1st resgroup in slice
let res = restwins[indx] # for upper twinpair residue value
for k in 0..Kn-1: # for each of Kn resgroups in slice
if seg[k].int == 0: # if seg byte for resgroup has twinprime
if modk + res.uint <= num: # and if upper twinprime <= num
echo(modk+res.uint-1) # print twinprime mid val on a line
modk += modpg.uint # set base value for next resgroup
*/
/**
* For 'nextp' array for given twinpair at 'indx' in 'restwins',
* init each col w/1st prime multiple resgroup for the primes r1..sqrt(N).
*/
ulong[] resinit(int indx, ulong[] nextp) {
debug writeln("resinit: indx = ", indx);
debug writeln("pgParameters.residueTwinPairs.length = ", pgParameters.residueTwinPairs.length);
uint res2 = pgParameters.residueTwinPairs[indx]; // upper twinpair value
uint res1 = res2 - 2; // lower twinpair value
uint row1 = 0; // nextp addr for lower|upper twinpair
uint row2 = pcnt;
foreach(size_t j, prime; primes) { // for each primes r1..sqrt(N)
auto k = (prime - 2) / pgParameters.modulus; // find the resgroup it's in
auto r = (prime - 2) % pgParameters.modulus + 2; // and its residue value
auto ri = (res1 * pgParameters.residueInverses[pos[r - 2]] - 2)
% pgParameters.modulus + 2; // compute the ri for r
auto prod = r * ri - 2; // compute residues cross-product
nextp[row1 + j] = k * (prime + ri) + prod / pgParameters.modulus;
// compute the ri for r
ri = (res2 * pgParameters.residueInverses[pos[r - 2]] - 2) % pgParameters.modulus + 2;
prod = r * ri - 2; // compute residues cross-product
nextp[row2 + j] = k * (prime + ri) + prod / pgParameters.modulus;
}
return nextp;
}
// For each twinpair thread find last prime <= num, adjust sum accordingly.
void lastTwinPrime(uint sum, uint Ki, uint Kn, uint indx, ubyte[] seg) {
debug writeln("lastTwinPrime: sum = ", sum, ", Ki = ", Ki, ", Kn = ", Kn, ", indx = ", indx);
uint modk = (Ki + Kn - 1) * pgParameters.modulus; // base val for last resgroup in slice
debug writeln("modk = ", modk);
uint res = pgParameters.residueTwinPairs[indx]; // upper twinpair residue for thread
debug writeln("res = ", res);
foreach (k; 0 .. Kn) { // starting at last resgroup in slice
debug writeln("seg[", Kn - k - 1, "] = ", seg[Kn - k - 1]);
if (seg[Kn - k - 1] == 0) { // if resgroup seg byte has twinpair
uint upper = modk + res; // numerate upper twinpair value
if (upper <= num) {
debug writeln("lastwins[", indx, "] = ", upper);
lastwins[indx] = upper;
break; // if last, store|exit
}
sum--; // else reduce overcount of sum and
}
modk -= pgParameters.modulus; // set base val of next lower resgroup
}
cnts[indx] = sum; // store adjusted sum for twinpair
}
/**
* Perform in a thread, the ssoz for a given twinpair, for Kmax resgroups.
* First create|init 'nextp' array of 1st prime mults for given twinpair, and
* its seg array of KB bytes, which will be gc'd|recovered at end of thread.
* For sieve, mark seg byte to '1' if either twinpair restrack is nonprime,
* for primes mults resgroups, update 'nextp' restrack slices acccordingly.
* Then find last twinprime|sum <= num, store sum in 'cnts' for this twinpair.
* Can optionally compile to print mid twinprime values generated by twinpair.
*/
void twinsSieve(uint Kmax, uint indx) {
debug writeln("twinsSieve: Kmax = ", Kmax, ", indx = ", indx);
uint sum; // init twins cnt|1st resgroup for slice
uint Ki;
uint Kn;
ubyte[] seg = new ubyte[](kb); // create seg byte array for twinpair
ulong[] nextp = new ulong[](pcnt * 2); // create 1st mults array for twinpair
nextp = resinit(indx, nextp); // init w/1st prime mults for twinpair
while (Ki < Kmax) { // for Kn resgroup size slices upto Kmax
Kn = min(kb, Kmax - Ki); // set segment slice resgroup length
debug writeln("Kn = ", Kn);
foreach (b; 0 .. Kn) {
seg[b] = 0; // set all seg restrack bits to prime
}
foreach (j, prime; primes) { // for each prime index r1..sqrt(N)
// for lower twinpair residue track
//debug writeln("j = ", j, ", prime = ", prime);
uint k = nextp[j].to!uint; // starting from this resgroup in 'seg'
while (k < Kn) { // for each primenth byte to end of 'seg'
seg[k] = seg[k] | 1; // mark byte as not a twinprime
k += prime; // compute next prime multiple resgroup
}
nextp[j] = k - Kn; // save 1st resgroup in next eligible seg
// for upper twinpair residue track
k = nextp[pcnt + j].to!uint; // starting from this resgroup in 'seg'
while (k < Kn) { // for each primenth byte to end of 'seg'
seg[k] = seg[k] | 1; // mark byte as not a twinprime
k += prime; // compute next prime multiple resgroup
}
nextp[pcnt + j] = k - Kn; // save 1st resgroup in next eligible seg
}
foreach (k; 0 .. Kn) {
if (seg[k] == 0) {
sum += 1; // sum bytes with twins
}
}
// printprms(Kn, Ki, indx, seg) // display twinprimes for this twinpair
Ki += kb; // set 1st resgroup val of next seg slice
}
Ki -= kb; // set to 1st resgroup of last seg slice
lastTwinPrime(sum, Ki, Kn, indx, seg); // find last twin|count for twin thread
}
/**
* Perform sieve on all segments
* Perform sieve on twinpair residues for segment of Kn resgroups in parallel.
* Then add seg twinprimes count from each twinpair thread to 'twinscnt'.
*/
void segsieve(uint Kmax) {
// for nextp|seg twinpair row indexes
foreach (indx; parallel(iota(0, pgParameters.twinPairsCount))) {
twinsSieve(Kmax, cast(uint) indx); // sieve the selected twinpair restracks
}
foreach (sum; cnts) {
twinscnt = twinscnt + sum; // update 'twinscnt' w/seg twinprime cnts
}
}
/**
* Use selected Pn prime generator for SSoZ
* Main routine to setup, perform, and display results for twinprimes sieve.
*/
void twinPrimesSsoz() {
write("Enter integer number: ");
ulong val;
readf!"%d"(val); // find primes <= val (13..2^64-1)
writeln("\nthreads = ", totalCPUs);
auto stopWatchSetup = StopWatch();
stopWatchSetup.start(); // start timing sieve setup execution
num = val - 1 | 1; // if val even subtract 1
selectPg(num); // select PG and seg factor for input number
auto k = num / pgParameters.modulus; // compute its resgroup
auto modk = pgParameters.modulus * k; // compute its base num
auto Kmax = (num - 2) / pgParameters.modulus + 1; // maximum number of resgroups for num
auto b = bn * 1024; // set seg size to optimize for selected PG
kb = cast(uint)(Kmax < b ? Kmax + 1 : b); // min num of segment resgroups
writeln("each thread segment is [", 1, " x ", kb, "] bytes array");
// This is not necessary for running the program but provides information
// to determine the 'efficiency' of the used PG: (num of primes)/(num of pcs)
// The closer the ratio is to '1' the higher the PG's 'efficiency'.
uint r = 0; // from first residue in last resgroup
while (num >= modk + pgParameters.residueTwinPairs[r]) {
r += 1; // find last tp index <= num
}
auto maxpairs = k * pgParameters.twinPairsCount + r; // maximum number of twinpairs <= num
writeln("twinprime candidates = ", maxpairs, "; resgroups = ", Kmax);
sozPg(cast(ulong) sqrt(cast(double) num)); // compute pcnt and primes <= sqrt(num)
writeln("each ", pgParameters.twinPairsCount, " threads has nextp[", 2, " x ", pcnt, "] array");
stopWatchSetup.stop(); // sieve setup time
writeln("setup time = ", stopWatchSetup.peek);
// 1st 4 tps
if (pgParameters.modulus > 30030) {
twinscnt = 4;
} else if(pgParameters.modulus > 210) {
twinscnt = 3;
} else {
twinscnt = 2;
}
writeln("perform twinprimes ssoz sieve"); // start doing ssoz now
auto stopWatchExecution = StopWatch(); // start timing ssoz sieve execution
stopWatchExecution.start();
segsieve(cast(uint) Kmax); // perform total twinprime ssoz sieve
debug writeln("lastwins = ", lastwins);
ulong ltwin = maxElement(lastwins); // find last twinprime, and its count
auto Kn = Kmax % kb; // set number of resgroups in last slice
if (Kn == 0)
Kn = kb; // if multiple of seg size set to seg size
stopWatchExecution.stop(); // sieve execution time
writeln("sieve time = ", stopWatchExecution.peek());
writeln("last segment = ", Kn, " resgroups; segment slices = ", Kmax / kb + 1);
writeln("total twins = ", twinscnt, "; last twin = ", ltwin - 1, "+/- 1");
writeln("total time = ", stopWatchSetup.peek() + stopWatchExecution.peek());
}
void main()
{
twinPrimesSsoz();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment