Last active
October 15, 2018 19:46
-
-
Save vnayar/79e2d0a9850833b8859dd9f08997b4d7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* A D port of the Nim program: | |
* https://gist.github.com/jzakiya/6c7e1868bd749a6b1add62e3e3b2341e | |
* | |
* Related code, papers, and tutorials, are available here: | |
* https://www.scribd.com/doc/228155369/The-Segmented-Sieve-of-Zakiya-SSoZ | |
* https://www.scribd.com/document/266461408/Primes-Utils-Handbook | |
*/ | |
module twinprimes_ssoz; | |
import std.algorithm : maxElement, min, sort, swap; | |
import std.array : array; | |
import std.typecons : Rebindable; | |
import std.conv : to; | |
import std.datetime.stopwatch : StopWatch; | |
import std.math; | |
import std.numeric : gcd; | |
import std.parallelism : parallel, totalCPUs; | |
import std.range : iota; | |
import std.stdio : readf, write, writeln; | |
/// Compute modular inverse a^(-1) of a to base b, e.g. a*(a^-1) mod b = 1. | |
int modInv(int a0, int b0) { | |
int a = a0; | |
int b = b0; | |
int x0 = 0; | |
int result = 1; | |
if (b == 1) { | |
return result; | |
} | |
while (a > 1) { | |
auto q = a / b; | |
a = a % b; | |
swap(a, b); | |
result -= q * x0; | |
swap(x0, result); | |
} | |
if (result < 0) { | |
result += b0; | |
} | |
return result; | |
} | |
/// Prime Generator Parameters | |
struct PgParameters { | |
uint modulus; | |
size_t residueCount; | |
size_t twinPairsCount; | |
uint[] residues; | |
uint[] residueTwinPairs; | |
uint[] residueInverses; | |
} | |
/// Creates constant parameters for given PrimeGenerator at compile time. | |
PgParameters genPgParameters(int prime)() { | |
pragma(msg, "generating parameters for P", prime); | |
uint[] primes = [2, 3, 5, 7, 11, 13, 17, 19, 23]; | |
// Compute PG's modulus. | |
uint modpg = 1; | |
foreach (prm; primes) { | |
modpg *= prm; | |
if (prm == prime) | |
break; | |
} | |
// Generate PG's residue values. | |
uint[] residues; | |
uint pc = 5; | |
uint inc = 2; | |
while (pc < modpg / 2) { | |
if (gcd(modpg, pc) == 1) { | |
residues ~= pc; | |
residues ~= modpg - pc; | |
} | |
pc += inc; | |
inc = inc ^ 0b110; | |
} | |
residues = sort(residues).array; | |
residues ~= modpg - 1; | |
residues ~= modpg + 1; | |
size_t rescnt = residues.length; // PG's residues count | |
// Extract upper twinpair residues here. | |
uint[] restwins; | |
uint j = 0; | |
while (j < rescnt - 1) { | |
if (residues[j] + 2 == residues[j + 1]) { | |
restwins ~= residues[j + 1]; | |
j++; | |
} | |
j++; | |
} | |
size_t twinpairs = restwins.length; // twinpairs count | |
// Create PG's residues inverses here. | |
uint[] inverses; | |
foreach (res; residues) { | |
inverses ~= modInv(res, modpg); | |
} | |
return PgParameters(modpg, rescnt, twinpairs, residues, restwins, inverses); | |
} | |
// Generate at compile time the parameters for PGs P5-P17. | |
enum parametersp5 = genPgParameters!(5); | |
enum parametersp7 = genPgParameters!(7); | |
enum parametersp11 = genPgParameters!(11); | |
enum parametersp13 = genPgParameters!(13); | |
// TODO: Discover why this causes compilation errors. | |
//immutable parametersp17 = genPGparameters!(17); | |
// Global parameters | |
shared uint pcnt; // Number of primes from r1..sqrt(N) | |
shared ulong num; // Adjusted (odd) input value | |
shared ulong twinscnt; // Number of twinprimes <= N | |
shared ulong[] primes; // List of primes r1..sqrt(N) | |
shared uint kb = 0; // Segment size for each seg restrack | |
shared uint[] cnts; // Hold twinprime counts for seg bytes | |
shared ulong[] lastwins; // Holds last twin <= num in each thread | |
shared uint[] pos; // Convert residue val to its residues index val | |
// faster than `residues.find(residue) | |
shared PgParameters pgParameters; | |
shared uint bn; // Segment size factor for PG and input number | |
/** | |
* Select at runtime best PG and segment size factor to use for input value. | |
* These are good estimates derived from PG data profiling. Can be improved. | |
*/ | |
void selectPg(ulong num) { | |
if (num < 10_000_000) { | |
debug writeln("Selecting parametersp5"); | |
pgParameters = parametersp5; | |
bn = 16; | |
} else if (num < 1_100_000_000) { | |
debug writeln("Selecting parametersp7"); | |
pgParameters = parametersp7; | |
bn = 32; | |
} else if (num < 35_500_000_000uL) { | |
debug writeln("Selecting parametersp11"); | |
pgParameters = parametersp11; | |
bn = 64; | |
} else /*if (num < 15_000_000_000_000uL)*/ { | |
debug writeln("Selecting parametersp13"); | |
pgParameters = parametersp13; | |
if (num > 7_000_000_000_000uL) { | |
bn = 384; | |
} else if (num > 2_500_000_000_000uL) { | |
bn = 320; | |
} else if (num > 250_000_000_000uL) { | |
bn = 196; | |
} else { | |
bn = 96; | |
} | |
}/* else { | |
// TODO: Fix when parametersp17 is properly computed. | |
// pgParameters = PgParameters(parametersp17); | |
pgParameters = parametersp13; | |
bn = 384; | |
}*/ | |
cnts = new uint[](pgParameters.twinPairsCount); // twinprime sums for seg bytes | |
pos = new uint[](pgParameters.modulus); // Create modpg size array to | |
foreach(i; 0 .. pgParameters.residueCount) { | |
pos[pgParameters.residues[i] - 2] = cast(uint) i; // Convert residue val -> indx | |
} | |
lastwins = new ulong[](pgParameters.twinPairsCount); // Holds last twin per thread. | |
} | |
/** | |
* Use PG to finds primes upto val. | |
* Compute the list of primes r1..sqrt(input_num), and store in global | |
* 'primes' array, and store their count in global var 'pcnt'. | |
* Any algorithm (fast|small) is usable. Here the SoZ for the PG is used. | |
*/ | |
void sozPg(ulong val) { | |
uint md = pgParameters.modulus; // PG's modulus value | |
ulong rscnt = pgParameters.residueCount; // PG's residue count | |
shared const(uint[]) res = pgParameters.residues; // PG's residues list | |
ulong num = (val - 1) | 1; // if val even then subtract 1 | |
ulong k = num / md; // compute its residue group value | |
ulong modk = md * k; // compute the resgroup base value | |
uint r = 0; // from 1st residue in num's resgroup | |
while (num >= modk + res[r]) { // find last pc val|position <= num | |
r += 1; | |
} | |
ulong maxpcs = k * rscnt + r; // max number of prime candidates <= num | |
bool[] prms = new bool[](maxpcs); // array of prime candidates set False | |
ulong sqrtN = cast(ulong) sqrt(cast(double) num); // compute integer sqrt of input num | |
// initialize sieve parameters | |
modk = 0; | |
r = -1; | |
k = 0; | |
// mark the multiples of the primes r1..sqrtN in 'prms' | |
foreach (prm; prms) { // from list of pc positions in prms | |
r += 1; | |
if (r == rscnt) { | |
r = 0; | |
modk += md; | |
k += 1; | |
} | |
if (prm) | |
continue; // If pc not prime go to next location | |
uint prm_r = res[r]; // if prime save its residue value | |
ulong prime = modk + prm_r; // numerate the prime value | |
if (prime > sqrtN) | |
break; // We're finished if it's > sqrtN | |
ulong prmstep = prime * rscnt; // prime's stepsize to mark its mults | |
foreach (ri; res) { // mark prime's multiples in prms | |
ulong prod = prm_r * ri - 2; // compute cross-product for r|ri pair | |
// compute resgroup val of 1st prime multiple for each restrack | |
// starting there, mark all prime multiples on restrack upto end of prms | |
ulong prm_mult = (k * (prime + ri) + prod / md) * rscnt + pos[prod % md]; | |
while (prm_mult < maxpcs) { | |
prms[prm_mult] = true; | |
prm_mult += prmstep; | |
} | |
} | |
} | |
// prms now contains the nonprime positions for the prime candidates r1..N | |
// extract primes into global var 'primes' and count into global var 'pcnt' | |
primes = []; // create empty dynamic array for primes | |
modk = 0; // initialize loop parameters | |
r = -1; | |
foreach (prm; prms) { // numerate|store primes from pcs list | |
r += 1; | |
if (r == rscnt) { | |
r = 0; | |
modk += md; | |
} | |
if (!prm) | |
primes ~= modk + res[r]; // put prime in global 'primes' list | |
} | |
pcnt = cast(uint) primes.length; // set global count of primes | |
} | |
/* | |
proc printprms(Kn: int, Ki: uint64, indx: int, seg: seq[uint8])= | |
## Print twinprimes for given twinpair for given segment slice. | |
## Primes will not be displayed in sorted order, collect|sort later for that. | |
var modk = Ki * modpg.uint # base value of 1st resgroup in slice | |
let res = restwins[indx] # for upper twinpair residue value | |
for k in 0..Kn-1: # for each of Kn resgroups in slice | |
if seg[k].int == 0: # if seg byte for resgroup has twinprime | |
if modk + res.uint <= num: # and if upper twinprime <= num | |
echo(modk+res.uint-1) # print twinprime mid val on a line | |
modk += modpg.uint # set base value for next resgroup | |
*/ | |
/** | |
* For 'nextp' array for given twinpair at 'indx' in 'restwins', | |
* init each col w/1st prime multiple resgroup for the primes r1..sqrt(N). | |
*/ | |
ulong[] resinit(int indx, ulong[] nextp) { | |
debug writeln("resinit: indx = ", indx); | |
debug writeln("pgParameters.residueTwinPairs.length = ", pgParameters.residueTwinPairs.length); | |
uint res2 = pgParameters.residueTwinPairs[indx]; // upper twinpair value | |
uint res1 = res2 - 2; // lower twinpair value | |
uint row1 = 0; // nextp addr for lower|upper twinpair | |
uint row2 = pcnt; | |
foreach(size_t j, prime; primes) { // for each primes r1..sqrt(N) | |
auto k = (prime - 2) / pgParameters.modulus; // find the resgroup it's in | |
auto r = (prime - 2) % pgParameters.modulus + 2; // and its residue value | |
auto ri = (res1 * pgParameters.residueInverses[pos[r - 2]] - 2) | |
% pgParameters.modulus + 2; // compute the ri for r | |
auto prod = r * ri - 2; // compute residues cross-product | |
nextp[row1 + j] = k * (prime + ri) + prod / pgParameters.modulus; | |
// compute the ri for r | |
ri = (res2 * pgParameters.residueInverses[pos[r - 2]] - 2) % pgParameters.modulus + 2; | |
prod = r * ri - 2; // compute residues cross-product | |
nextp[row2 + j] = k * (prime + ri) + prod / pgParameters.modulus; | |
} | |
return nextp; | |
} | |
// For each twinpair thread find last prime <= num, adjust sum accordingly. | |
void lastTwinPrime(uint sum, uint Ki, uint Kn, uint indx, ubyte[] seg) { | |
debug writeln("lastTwinPrime: sum = ", sum, ", Ki = ", Ki, ", Kn = ", Kn, ", indx = ", indx); | |
uint modk = (Ki + Kn - 1) * pgParameters.modulus; // base val for last resgroup in slice | |
debug writeln("modk = ", modk); | |
uint res = pgParameters.residueTwinPairs[indx]; // upper twinpair residue for thread | |
debug writeln("res = ", res); | |
foreach (k; 0 .. Kn) { // starting at last resgroup in slice | |
debug writeln("seg[", Kn - k - 1, "] = ", seg[Kn - k - 1]); | |
if (seg[Kn - k - 1] == 0) { // if resgroup seg byte has twinpair | |
uint upper = modk + res; // numerate upper twinpair value | |
if (upper <= num) { | |
debug writeln("lastwins[", indx, "] = ", upper); | |
lastwins[indx] = upper; | |
break; // if last, store|exit | |
} | |
sum--; // else reduce overcount of sum and | |
} | |
modk -= pgParameters.modulus; // set base val of next lower resgroup | |
} | |
cnts[indx] = sum; // store adjusted sum for twinpair | |
} | |
/** | |
* Perform in a thread, the ssoz for a given twinpair, for Kmax resgroups. | |
* First create|init 'nextp' array of 1st prime mults for given twinpair, and | |
* its seg array of KB bytes, which will be gc'd|recovered at end of thread. | |
* For sieve, mark seg byte to '1' if either twinpair restrack is nonprime, | |
* for primes mults resgroups, update 'nextp' restrack slices acccordingly. | |
* Then find last twinprime|sum <= num, store sum in 'cnts' for this twinpair. | |
* Can optionally compile to print mid twinprime values generated by twinpair. | |
*/ | |
void twinsSieve(uint Kmax, uint indx) { | |
debug writeln("twinsSieve: Kmax = ", Kmax, ", indx = ", indx); | |
uint sum; // init twins cnt|1st resgroup for slice | |
uint Ki; | |
uint Kn; | |
ubyte[] seg = new ubyte[](kb); // create seg byte array for twinpair | |
ulong[] nextp = new ulong[](pcnt * 2); // create 1st mults array for twinpair | |
nextp = resinit(indx, nextp); // init w/1st prime mults for twinpair | |
while (Ki < Kmax) { // for Kn resgroup size slices upto Kmax | |
Kn = min(kb, Kmax - Ki); // set segment slice resgroup length | |
debug writeln("Kn = ", Kn); | |
foreach (b; 0 .. Kn) { | |
seg[b] = 0; // set all seg restrack bits to prime | |
} | |
foreach (j, prime; primes) { // for each prime index r1..sqrt(N) | |
// for lower twinpair residue track | |
//debug writeln("j = ", j, ", prime = ", prime); | |
uint k = nextp[j].to!uint; // starting from this resgroup in 'seg' | |
while (k < Kn) { // for each primenth byte to end of 'seg' | |
seg[k] = seg[k] | 1; // mark byte as not a twinprime | |
k += prime; // compute next prime multiple resgroup | |
} | |
nextp[j] = k - Kn; // save 1st resgroup in next eligible seg | |
// for upper twinpair residue track | |
k = nextp[pcnt + j].to!uint; // starting from this resgroup in 'seg' | |
while (k < Kn) { // for each primenth byte to end of 'seg' | |
seg[k] = seg[k] | 1; // mark byte as not a twinprime | |
k += prime; // compute next prime multiple resgroup | |
} | |
nextp[pcnt + j] = k - Kn; // save 1st resgroup in next eligible seg | |
} | |
foreach (k; 0 .. Kn) { | |
if (seg[k] == 0) { | |
sum += 1; // sum bytes with twins | |
} | |
} | |
// printprms(Kn, Ki, indx, seg) // display twinprimes for this twinpair | |
Ki += kb; // set 1st resgroup val of next seg slice | |
} | |
Ki -= kb; // set to 1st resgroup of last seg slice | |
lastTwinPrime(sum, Ki, Kn, indx, seg); // find last twin|count for twin thread | |
} | |
/** | |
* Perform sieve on all segments | |
* Perform sieve on twinpair residues for segment of Kn resgroups in parallel. | |
* Then add seg twinprimes count from each twinpair thread to 'twinscnt'. | |
*/ | |
void segsieve(uint Kmax) { | |
// for nextp|seg twinpair row indexes | |
foreach (indx; parallel(iota(0, pgParameters.twinPairsCount))) { | |
twinsSieve(Kmax, cast(uint) indx); // sieve the selected twinpair restracks | |
} | |
foreach (sum; cnts) { | |
twinscnt = twinscnt + sum; // update 'twinscnt' w/seg twinprime cnts | |
} | |
} | |
/** | |
* Use selected Pn prime generator for SSoZ | |
* Main routine to setup, perform, and display results for twinprimes sieve. | |
*/ | |
void twinPrimesSsoz() { | |
write("Enter integer number: "); | |
ulong val; | |
readf!"%d"(val); // find primes <= val (13..2^64-1) | |
writeln("\nthreads = ", totalCPUs); | |
auto stopWatchSetup = StopWatch(); | |
stopWatchSetup.start(); // start timing sieve setup execution | |
num = val - 1 | 1; // if val even subtract 1 | |
selectPg(num); // select PG and seg factor for input number | |
auto k = num / pgParameters.modulus; // compute its resgroup | |
auto modk = pgParameters.modulus * k; // compute its base num | |
auto Kmax = (num - 2) / pgParameters.modulus + 1; // maximum number of resgroups for num | |
auto b = bn * 1024; // set seg size to optimize for selected PG | |
kb = cast(uint)(Kmax < b ? Kmax + 1 : b); // min num of segment resgroups | |
writeln("each thread segment is [", 1, " x ", kb, "] bytes array"); | |
// This is not necessary for running the program but provides information | |
// to determine the 'efficiency' of the used PG: (num of primes)/(num of pcs) | |
// The closer the ratio is to '1' the higher the PG's 'efficiency'. | |
uint r = 0; // from first residue in last resgroup | |
while (num >= modk + pgParameters.residueTwinPairs[r]) { | |
r += 1; // find last tp index <= num | |
} | |
auto maxpairs = k * pgParameters.twinPairsCount + r; // maximum number of twinpairs <= num | |
writeln("twinprime candidates = ", maxpairs, "; resgroups = ", Kmax); | |
sozPg(cast(ulong) sqrt(cast(double) num)); // compute pcnt and primes <= sqrt(num) | |
writeln("each ", pgParameters.twinPairsCount, " threads has nextp[", 2, " x ", pcnt, "] array"); | |
stopWatchSetup.stop(); // sieve setup time | |
writeln("setup time = ", stopWatchSetup.peek); | |
// 1st 4 tps | |
if (pgParameters.modulus > 30030) { | |
twinscnt = 4; | |
} else if(pgParameters.modulus > 210) { | |
twinscnt = 3; | |
} else { | |
twinscnt = 2; | |
} | |
writeln("perform twinprimes ssoz sieve"); // start doing ssoz now | |
auto stopWatchExecution = StopWatch(); // start timing ssoz sieve execution | |
stopWatchExecution.start(); | |
segsieve(cast(uint) Kmax); // perform total twinprime ssoz sieve | |
debug writeln("lastwins = ", lastwins); | |
ulong ltwin = maxElement(lastwins); // find last twinprime, and its count | |
auto Kn = Kmax % kb; // set number of resgroups in last slice | |
if (Kn == 0) | |
Kn = kb; // if multiple of seg size set to seg size | |
stopWatchExecution.stop(); // sieve execution time | |
writeln("sieve time = ", stopWatchExecution.peek()); | |
writeln("last segment = ", Kn, " resgroups; segment slices = ", Kmax / kb + 1); | |
writeln("total twins = ", twinscnt, "; last twin = ", ltwin - 1, "+/- 1"); | |
writeln("total time = ", stopWatchSetup.peek() + stopWatchExecution.peek()); | |
} | |
void main() | |
{ | |
twinPrimesSsoz(); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment