Last active
August 12, 2024 16:58
-
-
Save jzakiya/fa76c664c9072ddb51599983be175a3f to your computer and use it in GitHub Desktop.
Twinprimes generator, multi-threaded, using SSoZ (Segmented Sieve of Zakiya), written in C++.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This C++ source file is a multiple threaded implementation to perform an | |
extremely fast Segmented Sieve of Zakiya (SSoZ) to find Twin Primes <= N. | |
Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1. | |
Output is the number of twin primes <= N, or in range N1 to N2; the last | |
twin prime value for the range; and the total time of execution. | |
Code originally developed on a System76 laptop with an Intel I7 6700HQ cpu, | |
2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning | |
probably needed to optimize for other hadware systems (ARM, PowerPC, etc). | |
On Linux use -O compiler flag to compile for optimum speed: | |
$ g++ -O3 -fopenmp twinprimes_ssoz.cpp -o twinprimes_ssoz | |
To reduce binary size do: $ strip twinprimes_ssoz | |
Single val: $ echo val1 | ./twinprimes_ssoz | |
Range vals: $ echo val1 val2 | ./twinprimes_ssoz | |
Range values can be provided in either order (larger or smaller first). | |
Mathematical and technical basis for implementation are explained here: | |
https://www.academia.edu/37952623/The_Use_of_Prime_Generators_to_Implement_Fast_ | |
Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_ | |
for_the_Riemann_Hypotheses | |
https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ_ | |
https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK | |
https://www.academia.edu/81206391/Twin_Primes_Segmented_Sieve_of_Zakiya_SSoZ_Explained | |
This C++ source code, and updates, can be found here: | |
https://gist.github.com/jzakiya/fa76c664c9072ddb51599983be175a3f | |
This code is provided under the terms of the | |
GNU General Public License Version 3, GPLv3, or greater. | |
License copy/terms are here: http://www.gnu.org/licenses/ | |
Use of this code is free subject to acknowledgment of copyright. | |
Copyright (c) 2017-2024 Jabari Zakiya -- jzakiya at gmail dot com | |
Last update: 2024/08/12 | |
*/ | |
#include <cmath> | |
#include <algorithm> | |
#include <vector> | |
#include <array> | |
#include <tuple> | |
#include <cstdlib> | |
#include <iostream> | |
#include <stdint.h> | |
#include <chrono> | |
#include <omp.h> | |
#include <atomic> | |
using namespace std; | |
typedef uint64_t uint64; | |
// Compute modular inverse a^(-1) of a to base b, e.g. a*(a^-1) mod b = 1. | |
int modinv(int a0, int b0) { | |
int b = b0, a = a0, x = 0; | |
int inv = 1; | |
if (b == 1) return 1; | |
while (a > 1) { | |
int q = a / b; | |
int t = b; b = a % b; a = t; | |
t = x; x = inv - q * x; inv = t; | |
} | |
if (inv < 0) inv += b0; | |
return inv; | |
} | |
// Create prime generator parameters for given Pn | |
tuple<int, int, int, vector<int>, vector<int> > genPgParameters(int prime) { | |
cout << "using Prime Generator parameters for P" << prime << endl; | |
int primes[9] = {2, 3, 5, 7, 11, 13, 17, 19, 23}; | |
int modpg = 1; int res_0 = 0; | |
for (int prm : primes) { res_0 = prm; if (prm > prime) break; modpg *= prm; } | |
vector<int> restwins; // save upper twin pair residues here | |
vector<int> inverses(modpg + 2, 0); // save Pn's residues inverses here | |
int rc = 5; int inc = 2; int res = 0; // use P3's PGS to generate resdidue candidates | |
while (rc < (modpg >> 1)) { // find 1st half of PG's residues | |
if (__gcd(rc, modpg) == 1) { // if rc is a residue | |
int mc = modpg - rc; // create its modular complement | |
inverses[rc] = modinv(rc, modpg); // save rc and mc inverses | |
inverses[mc] = modinv(mc, modpg); // if in twinpair save both hi residues | |
if (res + 2 == rc) { restwins.push_back(rc); restwins.push_back(mc + 2); } | |
res = rc; // save current found residue | |
} | |
rc += inc; inc ^= 0b110; // create next P3 sequence rc: 5 7 11 13 17 19 ... | |
} | |
std::sort(restwins.begin(), restwins.end()); restwins.push_back(modpg + 1); | |
inverses[modpg + 1] = 1; inverses[modpg - 1] = modpg - 1; | |
return make_tuple(modpg, res_0, restwins.size(), restwins, inverses); | |
} | |
// Select at runtime best PG and segment size factor to use for input vals. | |
// These are good estimates derived from PG data profiling. Can be improved. | |
tuple<int, int, int, uint64, uint64, uint64, int, vector<int>, | |
vector<int>> setSieveParameters(uint64 start_num, uint64 end_num) { | |
uint64 nrange = end_num - start_num; | |
int Bn; int pg = 3; | |
if (end_num < 49) { | |
Bn = 1; pg = 3; | |
} | |
else if (nrange < 77000000) { | |
Bn = 16; pg = 5; | |
} | |
else if (nrange < 1100000000) { | |
Bn = 32; pg = 7; | |
} | |
else if (nrange < 35500000000) { | |
Bn = 64; pg = 11; | |
} | |
else if (nrange < 14000000000000) { | |
pg = 13; | |
if (nrange > 7000000000000) Bn = 384; | |
else if (nrange > 2500000000000) Bn = 320; | |
else if (nrange > 250000000000) Bn = 196; | |
else Bn = 128; | |
} else { | |
Bn = 384; pg = 17; | |
} | |
int modpg, res_0, pairscnt; vector<int> restwins, resinvrs; | |
tie(modpg, res_0, pairscnt, restwins, resinvrs) = genPgParameters(pg); | |
uint64 kmin = (start_num-2) / modpg+1; // number of resgroups to start_num | |
uint64 kmax = (end_num - 2) / modpg+1; // number of resgroups to end_num | |
uint64 krange = kmax - kmin + 1; // number of resgroups in range, at least 1 | |
uint n = (krange < 37500000000000) ? 10 : (krange < 975000000000000) ? 16 : 20; | |
uint b = Bn * 1024 * n; // set seg size to optimize for selected PG | |
uint ks = (krange < b) ? krange : b; // segments resgroups size | |
cout << "segment size = " << ks << " resgroups; seg array is [1 x " << ((ks-1) >> 6) + 1 << "] 64-bits\n"; | |
uint64 maxpairs = krange * pairscnt; // maximum number of twinprime pcs | |
cout << "twinprime candidates = " << maxpairs << "; resgroups = " << krange << endl; | |
return make_tuple(modpg, res_0, ks, kmin, kmax, krange, pairscnt, restwins, resinvrs); | |
} | |
// Return the primes r0..sqrt(end_num) within range (start_num...end_num) | |
vector<uint64> sozp5(uint64 val, uint res_0, uint64 start_num, uint64 end_num) { | |
uint md = 30; // P5's modulus value | |
int rescnt = 8; // P5's residue count | |
int res[8] = {7,11,13,17,19,23,29,31}; // P5's residues list | |
int bitn[30] = {0,0,0,0,0,1,0,0,0,2,0,4,0,0,0,8,0,16,0,0,0,32,0,0,0,0,0,64,0,128}; | |
uint range_size = end_num - start_num; // integers size of inputs range | |
uint64 kmax = (val - 2) / md + 1; // number of resgroups upto input value | |
vector<uint8_t> prms(kmax, 0); // byte array of prime candidates init '0' | |
uint sqrtn = (uint) sqrt((double) val);// compute integer sqrt of val | |
uint k = sqrtn/md; // compute its resgroup value | |
uint resk = sqrtn - md*k; int r =0; // compute its residue value; set residue start posn | |
while (resk >= res[r]) ++r; // find largest residue <= sqrtn posn in its resgroup | |
uint pcs_to_sqrtn = k*rescnt + r; // number of pcs <= sqrtn | |
for (uint i = 0; i < pcs_to_sqrtn; ++i) { // for r0..sqrtN primes mark their multiples | |
uint k = i/rescnt; int r = i%rescnt; // update resgroup parameters | |
if ((prms[k] & (1 << r)) != 0) continue; // skip pc if not prime | |
uint prm_r = res[r]; // if prime save its residue value | |
uint64 prime = md*k + prm_r; // numerate its value | |
uint rem = start_num % prime; // prime's modular distance to start_num | |
if (!(prime - rem <= range_size || rem == 0)) continue; // skip prime if no multiple in range | |
for (int ri : res) { // mark prime's multiples in prms | |
uint prod = prm_r * ri - 2; // compute cross-product for prm_r|ri pair | |
uint8_t bit_r = bitn[prod % md]; // bit mask value for prod's residue | |
uint64 kpm = k * (prime + ri) + prod / md; // resgroup for prime's 1st multiple | |
for (; kpm < kmax; kpm += prime) prms[kpm] |= bit_r; // mark prime's multiples | |
} } | |
// prms now contains the prime multiples positions marked for the pcs r0..N | |
// now along each restrack, identify|numerate the primes in each resgroup | |
// return only the primes with a multiple within range (start_num...end_num) | |
vector<uint64> primes; // dynamic array to store primes | |
for (int r = 0; r < rescnt; ++r) { // along each restrack|row til end | |
for (uint64 k = 0; k < kmax; ++k) { // for each resgroup along restrack | |
if ((prms[k] & (1 << r)) == 0) { // if bit location is prime | |
uint64 prime = md * k + res[r]; // numerate its value, store if in range | |
// check if prime has multiple in range, if so keep it, if not don't | |
uint64 rem = start_num % prime; // if rem is 0 then start_num multiple of prime | |
if ((res_0 <= prime && prime <= val) && (prime - rem <= range_size || rem == 0)) primes.push_back(prime); | |
} } } | |
return primes; // primes stored in restrack|row sequence order | |
} | |
// Initialize 'nextp' array for twinpair upper residue rhi in 'restwins'. | |
// Compute 1st prime multiple resgroups for each prime r0..sqrt(N) and | |
// store consecutively as lo_tp|hi_tp pairs for their restracks. | |
vector<uint64> nextp_init(uint rhi, uint64 kmin, uint modpg, vector<uint64> primes, vector<int> resinvrs) { | |
vector<uint64> nextp(primes.size() * 2); // 1st mults array for twinpair | |
uint r_hi = rhi; // upper twinpair residue | |
uint r_lo = rhi - 2; // lower twinpair residue | |
for (int j = 0; j < primes.size(); ++j) { // for each prime r1..sqrt(N) | |
uint prime = primes[j]; // get the prime | |
uint k = (prime - 2) / modpg; // find the resgroup it's in | |
uint r = (prime - 2) % modpg + 2; // and its residue value | |
uint64 r_inv = resinvrs[r]; // and its residue inverse | |
uint64 rl = (r_inv * r_lo - 2) % modpg + 2; // compute the ri for r for lo_tp | |
uint64 rh = (r_inv * r_hi - 2) % modpg + 2; // compute the ri for r for hi_tp | |
uint64 kl = k * (prime + rl) + (r * rl - 2) / modpg; // kl 1st mult restroup | |
uint64 kh = k * (prime + rh) + (r * rh - 2) / modpg; // kh 1st mult restroup | |
if (kl < kmin) { kl = (kmin - kl) % prime; if (kl > 0) kl = prime - kl; } else { kl -= kmin; } | |
if (kh < kmin) { kh = (kmin - kh) % prime; if (kh > 0) kh = prime - kh; } else { kh -= kmin; } | |
nextp[j << 1] = kl; // prime's 1st mult lo_tp resgroups in range | |
nextp[j << 1 | 1] = kh; // prime's 1st mult hi_tp resgroups in range | |
} | |
return nextp; | |
} | |
// Perform in thread the ssoz for given twinpair residues for kmax resgroups. | |
// First create|init 'nextp' array of 1st prime mults for given twinpair, | |
// stored consequtively in 'nextp', and init seg array for ks resgroups. | |
// For sieve, mark resgroup bits to '1' if either twinpair restrack is nonprime | |
// for primes mults resgroups, and update 'nextp' restrack slices acccordingly. | |
// Return the last twinprime|sum for the range for this twinpair residues | |
tuple<uint64, uint64> twins_sieve(uint r_hi, uint64 kmin, uint64 kmax, uint ks, uint64 start_num, | |
uint64 end_num, uint modpg, vector<uint64> primes, vector<int>resinvrs) { | |
uint s = 6; // shift value for 64 bits | |
uint bmask = (1 << s) - 1; // bitmask val for 64 bits | |
uint64 sum = 0; // init tp cnt for this twin pair | |
uint64 ki = kmin - 1; // 1st resgroup seg val for each slice | |
uint kn = ks; // segment resgroup size | |
uint64 hi_tp = 0; // init value of largest tp in segment | |
uint64 k_max = kmax; // max number of resgroups for segment | |
vector<uint64> seg(((ks-1) >> s) + 1); // seg array for ks resgroups | |
if (r_hi - 2 < (start_num - 2) % modpg + 2) ++ki; // ensure lo tp in range | |
if (r_hi > (end_num -2) % modpg + 2) --k_max; // ensure hi tp in range | |
vector<uint64> nextp = nextp_init(r_hi, ki, modpg, primes, resinvrs); // 1st prime mults | |
while (ki < k_max) { // for ks resgroup size slices upto kmax | |
if (ks > (k_max - ki)) kn = (k_max - ki); // adjust kn for last seg size | |
for (int j = 0; j < primes.size(); ++j) { // for each prime index r1..sqrt(N) | |
uint prime = primes[j]; // for this prime | |
// for lower pair residue track | |
uint64 k1 = nextp[j << 1]; // starting from this lower resgroup in seg | |
for (; k1 < kn; k1 += prime) // mark primenth resgrouup bits prime mults | |
seg[k1 >> s] |= uint64(1) << (k1 & bmask); | |
nextp[j << 1] = k1 - kn; // save 1st lower resgroup in next eligible seg | |
// for upper pair residue track | |
uint64 k2 = nextp[j << 1 | 1]; // starting from this upper resgroup in seg | |
for (; k2 < kn; k2 += prime) // mark primenth resgrouup bits prime mults | |
seg[k2 >> s] |= uint64(1) << (k2 & bmask); | |
nextp[j << 1 | 1] = k2 - kn; // save 1st upper resgroup in next eligible seg | |
} // set as nonprime unused bits in last seg[n] | |
// so fast, do for every seg[i] | |
seg[(kn-1) >> s] |= ~uint64(1) << ((kn-1) & bmask); | |
uint cnt = 0; // count the twinprimes in the segment | |
for (int k = 0; k <= (kn-1) >> s; ++k) cnt += __builtin_popcountl(~seg[k]); | |
if (cnt > 0) { // if segment has twin primes | |
sum += cnt; // add the segment count to total count | |
uint upk = kn - 1; // from end of seg count backwards to largest tp | |
while ((seg[upk >> s] & (uint64(1) << (upk & bmask))) != 0) upk--; | |
hi_tp = ki + upk; // numerate its full resgroup value | |
} | |
ki += ks; // set 1st resgroup val of next seg slice | |
if (ki < k_max) {for (int b = 0; b <= ((kn-1) >> s); ++b) seg[b] = 0;} // set all seg bits prime | |
} | |
hi_tp = ((r_hi > end_num) || (sum == 0)) ? 1 : hi_tp * modpg + r_hi; | |
return make_tuple(hi_tp, sum); | |
} | |
int main() { | |
vector<uint64> x; | |
uint64 num; | |
while ( cin >> num ) x.push_back(num); | |
uint64 end_num = (x[0] > 3) ? x[0] : 3; | |
uint64 start_num = (x[1] > 3) ? x[1] : 3; | |
if (start_num > end_num) swap(start_num, end_num); | |
start_num |= 1; // if start_num even increase by 1 | |
end_num = (end_num - 1) | 1; // if end_num even decrease by 1 | |
if (end_num - start_num < 2) { start_num = 7; end_num = 7; } | |
cout << "threads = " << omp_get_max_threads() << endl; | |
auto ts = chrono::system_clock::now(); // start timing sieve setup execution | |
int modpg, res_0, pairscnt; vector<int> restwins, resinvrs; | |
uint64 ks, kmin, kmax, krange; | |
tie(modpg, res_0, ks, kmin, kmax, krange, pairscnt, restwins, resinvrs) = setSieveParameters(start_num, end_num); | |
// create sieve primes <= sqrt(end_num), only use those whose multiples within inputs range | |
vector<uint64> primes; | |
(end_num < 49) ? primes = {5} : primes = sozp5((uint64) sqrt((double) end_num), res_0, start_num, end_num); | |
cout << "each of "<< pairscnt << " threads has nextp["<< 2 << " x " <<primes.size() << "] array\n"; | |
uint64 twinscnt = 0; // number of twin primes in range | |
int lo_range = restwins[0] - 3; // lo_range = lo_tp - 1 | |
for (int tp : {3, 5, 11, 17}) { // lo tps for available Pn's | |
if (end_num == 3) break; // if 3 end of range, no twin primes | |
if (start_num <= tp && tp < lo_range) twinscnt++; // cnt small tps if any | |
} | |
auto te = chrono::system_clock::now() - ts; // sieve setup time | |
chrono::duration<double> seconds = te; | |
cout << "setup time = " << seconds.count() << " secs\n"; | |
cout << "perform twinprimes ssoz sieve\n"; | |
auto t1 = chrono::system_clock::now(); // start timing ssoz sieve execution | |
vector<uint64> cnts(pairscnt); // twins cnts for each twinpair | |
vector<uint64> lastwins(pairscnt); // last|largest twin per twinpair | |
std::atomic<uint> threadscnt(0); // count of finished threads | |
#pragma omp parallel for // process twinpairs in parrallel | |
for (int i = 0; i < pairscnt; ++i) { // process each twinpair in own thread | |
uint64 l, c; | |
tie(l, c) = twins_sieve(restwins[i], kmin, kmax, ks, start_num, end_num, modpg, primes, resinvrs); | |
lastwins[i] = l; cnts[i] = c; | |
cout << "\r" << threadscnt++ << " of " << pairscnt << " twinpairs done"; | |
} | |
cout << "\r" << pairscnt << " of " << pairscnt << " twinpairs done"; | |
uint64 last_twin = 0; // init val for largest twinprime | |
for (int i = 0; i < pairscnt; ++i) { // find largest twinprime|sum in range | |
twinscnt += cnts[i]; | |
if (last_twin < lastwins[i]) last_twin = lastwins[i]; | |
} | |
if ((end_num == 5) && (twinscnt == 1)) last_twin = 5; | |
uint kn = krange % ks; | |
if (kn == 0) kn = ks; | |
auto t2 = chrono::system_clock::now() - t1; | |
chrono::duration<double> secs1 = t2; | |
chrono::duration<double> secs2 = t2 + te; | |
cout << "\nsieve time = " << secs1.count() << " secs\n"; | |
cout << "total time = " << secs2.count() << " secs\n"; | |
cout << "last segment = "<<kn<<" resgroups; segment slices = "<< (krange-1)/ks + 1<<"\n"; | |
cout << "total twins = " << twinscnt << "; last twin = " << (last_twin - 1) << "+/-1"<< endl; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment