-
-
Save Pascal66/e25769c1248325d45e7cdbcec25d53f2 to your computer and use it in GitHub Desktop.
Twinprimes generator, multi-threaded, using SSoZ (Segmented Sieve of Zakiya), written in Nim
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[ | |
This Nim source file is a multiple threaded implementation to perform an | |
extremely fast Segmented Sieve of Zakiya (SSoZ) to find Twin Primes <= N. | |
Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1. | |
Output is the number of twin primes <= N, or in range N1 to N2; the last | |
twin prime value for the range; and the total time of execution. | |
This code was developed on a System76 laptop with an Intel I7 6700HQ cpu, | |
2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning | |
would be needed to optimize for other hardware systems (ARM, PowerPC, etc). | |
To compile for nim versions <= 0.19.x (can use --cc:gcc or --cc:clang) do: | |
$ nim c --cc:gcc --d:release --threads:on twinprimes_ssoz.nim | |
For Nim >= 0.20.0|1.0-rc1 compile as: | |
$ nim c --cc:gcc --d:danger --threads:on twinprimes_ssoz.nim | |
Then run executable: $ ./twinprimes_ssoz <cr>, and enter range values. | |
Or alternatively: $ echo <range1> <range2> | ./twinprimes_ssoz | |
Range values can be provided in either order (larger or smaller first). | |
This source file, and updates, will be available here: | |
https://gist.github.com/jzakiya/6c7e1868bd749a6b1add62e3e3b2341e | |
Mathematical and technical basis for implementation are explained here: | |
https://www.scribd.com/document/395415391/The-Use-of-Prime-Generators-to- | |
Implement-Fast-Twin-Primes-Sieve-Of-Zakiya-SoZ-Applications-to-Number-Theory- | |
and-Implications-for-the-Riemann-Hypoth | |
https://www.scribd.com/doc/228155369/The-Segmented-Sieve-of-Zakiya-SSoZ | |
https://www.scribd.com/document/266461408/Primes-Utils-Handbook | |
This code is provided free and subject to copyright and terms of the | |
GNU General Public License Version 3, GPLv3, or greater. | |
License copy/terms are here: http://www.gnu.org/licenses/ | |
Copyright (c) 2017-19 Jabari Zakiya -- jzakiya at gmail dot com | |
Version Date: 2019/9/4 | |
]# | |
import math # for sqrt, gcd, mod functions | |
import strutils, typetraits # for number input | |
import times, os # for timing code execution | |
import osproc # for getting threads count | |
import threadpool # for parallel processing | |
import algorithm # for sort | |
{.experimental.} # required to use 'parallel' (<= 0.20.x) | |
# Global array used to count number of primes in each 'seg' byte. | |
# Each value is number of '0' bits (primes) for values 0..255. | |
let pbits = [ | |
8,7,7,6,7,6,6,5,7,6,6,5,6,5,5,4,7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3 | |
,7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2 | |
,7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2 | |
,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1 | |
,7,6,6,5,6,5,5,4,6,5,5,4,5,4,4,3,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2 | |
,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1 | |
,6,5,5,4,5,4,4,3,5,4,4,3,4,3,3,2,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1 | |
,5,4,4,3,4,3,3,2,4,3,3,2,3,2,2,1,4,3,3,2,3,2,2,1,3,2,2,1,2,1,1,0] | |
proc modinv(a0, b0: int): int = | |
## Compute modular inverse a^-1 of a to base b, e.g. a*(a^-1) mod b = 1. | |
var (a, b, x0) = (a0, b0, 0) | |
result = 1 | |
if b == 1: return | |
while a > 1: | |
result -= (a div b) * x0 | |
a = a mod b | |
swap a, b | |
swap x0, result | |
if result < 0: result += b0 | |
proc genPGparameters(prime: int): (int, int, seq[int], seq[int]) = | |
## Create constant parameters for given PG at compile time. | |
#echo("generating parameters for P", prime) | |
let primes = [2, 3, 5, 7, 11, 13, 17, 19, 23] | |
var (modpg, res_0) = (1, 0) # compute PG's modulus and res_0 value | |
for prm in primes: (res_0 = prm; if prm > prime: break; modpg *= prm) | |
var restwins: seq[int] = @[] # save upper twin pair residues here | |
var inverses = newSeq[int](modpg) # save PG's residues inverses here | |
var (pc, inc, res) = (5, 2, 0) # use P3's PGS to generate pcs | |
while pc < modpg div 2: # find a residue, then modular complement | |
if gcd(modpg, pc) == 1: # if pc a residue | |
let pc_mc = modpg - pc # create its modular complement | |
var inv_r = modinv(pc, modpg) # compute residues's inverse | |
inverses[pc - 2] = inv_r # save its inverse | |
inverses[inv_r - 2] = pc # save its inverse inverse | |
inv_r = modinv(pc_mc, modpg) # compute residues's complement inverse | |
inverses[pc_mc - 2] = inv_r # save its inverse | |
inverses[inv_r - 2] = pc_mc # save its inverse inverse | |
if res + 2 == pc: restwins.add(pc); restwins.add(pc_mc + 2) # save hi_tp residues | |
res = pc | |
pc += inc; inc = inc xor 0b110 | |
restwins.sort(system.cmp[int]); restwins.add(modpg + 1) # last residue is last hi_tp | |
inverses[modpg - 1] = 1; inverses[modpg - 3] = modpg - 1 # last 2 resdiues are self inverses | |
result = (modpg, res_0, restwins, inverses) | |
# Global parameters | |
var | |
KB = 0 # segment size for each seg restrack | |
start_num: uint # lo number for range | |
end_num: uint # hi number for range | |
Kmax: uint # number of resgroups to end_num | |
Kmin: uint # number of resgroups to start_num | |
primes: seq[int] # list of primes r1..sqrt(N) | |
cnts: seq[uint] # hold twin primes counts for seg bytes | |
lastwins: seq[uint] # holds largest twin prime <= num in each thread | |
modpg: int # PG's modulus value | |
res_0: int # PG's first residue value | |
restwins: seq[int] # PG's list of twinpair residues | |
resinvrs: seq[int] # PG's list of residues inverses | |
Bn: int # segment size factor for PG and input number | |
s: int # 0|3 for 1|8 resgroups/byte for 'small|large' ranges | |
proc selectPG(start_range, end_range: uint) = | |
## Select at runtime best PG and segment size factor to use for input value. | |
## These are good estimates derived from PG data profiling. Can be improved. | |
let range = end_range - start_range | |
var pg = 3 | |
if end_range < 49'u: | |
Bn = 1; pg = 3 | |
elif range < 10_000_000: | |
Bn = 16; pg = 5 | |
elif range < 1_100_000_000'u: | |
Bn = 32; pg = 7 | |
elif range < 35_500_000_000'u: | |
Bn = 64; pg = 11 | |
elif range < 15_000_000_000_000'u: | |
pg = 13 | |
if range > 7_000_000_000_000'u: Bn = 384 | |
elif range > 2_500_000_000_000'u: Bn = 320 | |
elif range > 250_000_000_000'u: Bn = 196 | |
else: Bn = 128 #96 | |
else: | |
Bn = 384; pg = 17 | |
# Set value for 'small' or 'large' ranges to opt sieving | |
s = if range < 100_000_000_000'u: 0 else: 3 | |
(modpg, res_0, restwins, resinvrs) = genPGparameters(pg) | |
proc sozpg(val: int | int64) = | |
## Compute the primes r1..sqrt(input_num) and store in global 'primes' array. | |
## Any algorithm (fast|small) is usable. Here the SoZ for P5 is used. | |
let md = 30 # P5's modulus value | |
let rscnt = 8 # P5's residue count | |
let res =[7,11,13,17,19,23,29,31] # P5's residues list | |
let posn = [0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,3,0,4,0,0,0,5,0,0,0,0,0,6,0,7] | |
let kmax = (val - 7) div md + 1 # number of resgroups to input value | |
var prms = newSeq[uint8](kmax) # byte array of prime candidates init '0' | |
let sqrtN =int(sqrt float64(val)) # integer sqrt of sqrt(input value) | |
var (modk, r, k) = (0, -1, 0) # init residue parameters | |
# mark the multiples of the primes r1..sqrtN in 'prms' | |
while true: | |
if (r.inc; r) == rscnt: (r = 0; modk += md; k.inc) | |
if (prms[k] and uint8(1 shl r)) != 0: continue # skip pc if not prime | |
let prm_r = res[r] # if prime save its residue value | |
let prime = modk + prm_r # numerate the prime value | |
if prime > sqrtN: break # we're finished when it's > sqrtN | |
for ri in res: # mark prime's multiples in prms | |
let prod = prm_r * ri - 2 # compute cross-product for prm_r|ri pair | |
let bit_r = uint8(1 shl posn[prod mod md]) # bit mask for prod's residue | |
var kpm = k * (prime + ri) + prod div md # 1st resgroup for prime mult | |
while kpm < kmax: prms[kpm] = prms[kpm] or bit_r; kpm += prime | |
# prms now contains the nonprime positions for the prime candidates r1..N | |
# extract primes into global var 'primes' | |
primes = @[] # create empty dynamic array for primes | |
for k in 0..kmax - 1: # for each resgroup | |
for r, res_r in res: # extract the primes in numerical order | |
if (prms[k] and uint8(1 shl r)) == 0: primes.add(md * k + res_r) | |
while primes[0] < res_0: primes.delete(0) | |
while primes[^1] > val: discard primes.pop() | |
#[ | |
proc printprms(Kn: int, Ki: uint64, indx: int, seg: seq[uint8]) = | |
## Print twinprimes for given twinpair for given segment slice. | |
## Primes will not be displayed in sorted order, collect|sort later for that. | |
var modk = Ki * modpg.uint # base value of 1st resgroup in slice | |
let r_hi = restwins[indx].uint # for upper twinpair residue value | |
for k in 0..(Kn - 1) shr 3: # for each byte of resgroups in slice | |
for r in 0..7: # extract the primes for each resgroup | |
if (seg[k] and uint8(1 shl r)) == 0 and (modk + r_hi) <= end_num: | |
echo(modk + r_hi - 1) # print twinprime mid val on a line | |
modk += modpg.uint # set base value for next resgroup | |
]# | |
proc nextp_init(hi_r: int): seq[uint64] = | |
## Initialize 'nextp' array for given twin pair residues for thread. | |
## Compute 1st prime multiple resgroups for each prime r1..sqrt(N) | |
## and store consecutively as lo_tp|hi_tp pairs for their restracks. | |
var nextp = newSeq[uint64](primes.len * 2) # 1st mults array for twin pair | |
let (r_hi, r_lo) = (hi_r, hi_r - 2) # upper|lower twin pair residues | |
var kmin = Kmin - 1 # resgroup index of start_num | |
if kmin * modpg.uint + r_lo.uint < start_num: kmin.inc # ensure r_lo in range | |
for j, prime in primes: # for each prime r1..sqrt(N) | |
let k = (prime - 2) div modpg # find the resgroup it's in | |
let r = (prime - 2) mod modpg + 2 # and its residue value | |
let r_inv = resinvrs[r - 2] # and its residue inverse | |
var ri = (r_lo * r_inv - 2) mod modpg + 2 # compute the ri for r for lo_tp | |
var ki = uint(k * (prime + ri) + (r * ri - 2) div modpg) | |
if ki < kmin: # if 1st mult index < start_num's | |
ki = (kmin - ki) mod prime.uint # how many indices short is it | |
if ki > 0'u: ki = prime.uint - ki # adjust index value into range | |
else: ki = ki - kmin # else here, adjust index if it was > | |
nextp[2 * j] = ki # lo_tp index val >= start of range | |
ri = (r_hi * r_inv - 2) mod modpg + 2 # compute the ri for r for hi_tp | |
ki = uint(k * (prime + ri) + (r * ri - 2) div modpg) | |
if ki < kmin: # if 1st mult index < start_num's | |
ki = (kmin - ki) mod prime.uint # how many indices short is it | |
if ki > 0'u: ki = prime.uint - ki # adjust index value into range | |
else: ki = ki - kmin # else here, adjust index if it was > | |
nextp[2 * j or 1] = ki # hi_tp index val >= start of range | |
result = nextp | |
proc twins_sieve(indx: int, r_hi: uint) {.gcsafe.} = | |
## Perform in a thread, the ssoz for a given twinpair, for Kmax resgroups. | |
## First create|init 'nextp' array of 1st prime mults for given twin pair, | |
## (stored consequtively in 'nextp') and init seg byte array for KB resgroups. | |
## For sieve, mark resgroup bits to '1' if either twinpair restrack is nonprime, | |
## for primes mults resgroups, and update 'nextp' restrack slices acccordingly. | |
## Find last twin prime|sum for range, store in their arrays for this twinpair. | |
## Can optionally compile to print mid twinprime values generated by twinpair. | |
## Uses optimum segment sieve structure for 'small' and 'large' range values. | |
{.gcsafe.}: | |
var (sum, Ki, Kn) = (0'u, Kmin-1, KB)# init twins cnt|1st resgroup for slice | |
var (hi_tp, Kmax) = (0'u, Kmax) # max tp|resgroup, Kmax for slice | |
var seg = newSeq[uint8](((KB-1) shr s) + 1) # seg byte array for Kb resgroups | |
var nextp = nextp_init(r_hi.int) # 1st prime mults for twin pair restracks | |
if Ki * modpg.uint + (r_hi - 2) < start_num: Ki.inc # ensure lo tps in range | |
if (Kmax - 1) * modpg.uint + r_hi > end_num: Kmax.dec # and hi tps in range | |
while Ki < Kmax: # for Kn resgroup size slices upto Kmax | |
if KB > int(Kmax - Ki): Kn = int(Kmax - Ki) # set last slice resgroup size | |
for b in 0..(Kn - 1) shr s: seg[b] = 0 # set all seg byte bits to prime | |
for j, prime in primes: # for each prime index r1..sqrt(N) | |
# for lower twin pair residue track | |
var k = nextp[2 * j].int # starting from this resgroup in seg | |
while k < Kn: # mark primenth resgroup bits prime mults | |
if s > 0: seg[k shr 3] = seg[k shr 3] or (1 shl (k and 7)).uint8 | |
else: seg[k] = seg[k] or 1 | |
k += prime # set next prime multiple resgroup | |
nextp[2 * j] = uint(k - Kn) # save 1st resgroup in next eligible seg | |
# for upper twin pair residue track | |
k = nextp[2 * j or 1].int # starting from this resgroup in seg | |
while k < Kn: # mark primenth resgroup bits prime mults | |
if s > 0: seg[k shr 3] = seg[k shr 3] or (1 shl (k and 7)).uint8 | |
else: seg[k] = seg[k] or 1 | |
k += prime # set next prime multiple resgroup | |
nextp[2 * j or 1] = uint(k - Kn) # save 1st resgroup in next eligible seg | |
# need to set as nonprime unused bits in last | |
# byte of last seg; so fast, do for every seg | |
if s > 0: seg[(Kn-1) shr 3] = seg[(Kn-1) shr 3] or (not ((2 shl ((Kn-1) mod 8)) - 1)).uint8 | |
var cnt = 0 # init seg twin primes count then find seg sum | |
if s > 0: (for k in 0..(Kn - 1) shr 3: cnt += pbits[seg[k]]) | |
else: (for k in 0..Kn - 1: (if seg[k] == 0: cnt.inc)) | |
if cnt > 0: # if segment has twin primes | |
sum += cnt.uint # add the segment count to total count | |
var upk = Kn - 1 # from end of seg, count backwards to largest tp | |
if s > 0: (while (seg[upk shr 3] and (1 shl (upk and 7)).uint8) != 0: upk.dec) | |
else: (while seg[upk] != 0: upk.dec) | |
hi_tp = Ki + upk.uint # numerate its full resgroup value | |
#printprms(Kn, Ki, indx, seg) # optional: display twin primes in seg | |
Ki += KB.uint # set 1st resgroup val of next seg slice | |
# numerate largest twin prime in segs | |
hi_tp = if r_hi > end_num: 0'u else: hi_tp * modpg.uint + r_hi | |
lastwins[indx] = if sum == 0: 1'u else: hi_tp # store final seg tp value | |
cnts[indx] = sum # sum for twin pair | |
proc twinprimes_ssoz() = | |
## Main routine to setup, time, and display results for twin primes sieve. | |
let x = stdin.readline.split | |
end_num = max(x[0].parseUInt, 3'u) | |
start_num = if x.len > 1: max(x[1].parseUInt, 3'u) else: 3'u | |
if start_num > end_num: swap start_num, end_num | |
echo("threads = ", countProcessors()) | |
let ts = epochTime() # start timing sieve setup execution | |
start_num = start_num or 1 # if start_num even add 1 | |
end_num = (end_num - 1) or 1 # if end_num even subtract 1 | |
selectPG(start_num, end_num) # select PG and seg factor Bn for input range | |
let pairscnt = restwins.len # number of twin pairs for selected PG | |
cnts = newSeq[uint](pairscnt) # array to hold count of tps for each thread | |
lastwins = newSeq[uint](pairscnt)# array to hold largest tp for each thread | |
Kmin = (start_num-2) div modpg.uint + 1 # number of resgroups to start_num | |
Kmax = (end_num-2) div modpg.uint + 1 # number of resgroups to end_num | |
let range = Kmax - Kmin + 1 # number of range resgroups, at least 1 | |
var n = if range < 37_500_000_000_000'u: 4 elif range < 975_000_000_000_000'u: 6 else: 8 | |
if s == 0: n = 1 | |
let B = Bn * 1024 * n # set seg size to optimize for selected PG | |
KB = if range.int < B: range.int else: B # segments resgroups size | |
echo("each thread segment is [", 1, " x ", ((KB-1) shr s) + 1, "] bytes array") | |
# This is not necessary for running the program but provides information | |
# to determine the 'efficiency' of the used PG: (num of primes)/(num of pcs) | |
let maxpairs = range * pairscnt.uint # maximum number of twinprime pcs | |
echo("twinprime candidates = ", maxpairs, "; resgroups = ", range) | |
# End of non-essential code. | |
if end_num < 49'u: primes.add(5) # generate sieving primes | |
else: sozpg(int(sqrt float64(end_num))) # <= sqrt(end_num) | |
echo("each ", pairscnt, " threads has nextp[", 2, " x ", primes.len, "] array") | |
var twinscnt = 0'u64 # number of twin primes in range | |
let lo_range = uint(restwins[0] - 3) # lo_range = lo_tp - 1 | |
for tp in [3, 5, 11, 17]: # excluded low tp values for PGs used | |
if end_num == 3'u: break # if 3 end of range, no twin primes | |
if tp.uint in start_num..lo_range: twinscnt += 1 # cnt small tps if any | |
let te = epochTime() - ts # sieve setup time | |
echo("setup time = ", te.formatFloat(ffDecimal, 6), " secs") | |
echo("perform twinprimes ssoz sieve") | |
let t1 = epochTime() # start timing ssoz sieve execution | |
parallel: # perform in parallel | |
for indx, r_hi in restwins: # for each twin pair row index | |
spawn twins_sieve(indx, r_hi.uint) # sieve selected twin pair restracks | |
stdout.write("\r", (indx + 1), " of ", pairscnt, " threads done") | |
sync() # when all the threads finish | |
var last_twin = 0'u # find largest twin prime in range | |
for i in 0..pairscnt - 1: # and determine total twin primes count | |
twinscnt += cnts[i] | |
if last_twin < lastwins[i]: last_twin = lastwins[i] | |
if end_num == 5'u and twinscnt == 1: last_twin = 5'u | |
var Kn = range.int mod KB # set number of resgroups in last slice | |
if Kn == 0: Kn = KB # if multiple of seg size set to seg size | |
let t2 = epochTime() - t1 # sieve execution time | |
echo("\nsieve time = ", t2.formatFloat(ffDecimal, 3), " secs") | |
echo("last segment = ", Kn, " resgroups; segment slices = ", (range - 1) div KB.uint + 1) | |
echo("total twins = ", twinscnt, "; last twin = ", last_twin - 1, "+/-1") | |
echo("total time = ", (t2 + te).formatFloat(ffDecimal, 3), " secs\n") | |
twinprimes_ssoz() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment