Last active
August 12, 2024 17:06
-
-
Save jzakiya/6c7e1868bd749a6b1add62e3e3b2341e to your computer and use it in GitHub Desktop.
Twinprimes generator, multi-threaded, using SSoZ (Segmented Sieve of Zakiya), written in Nim
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[ | |
This Nim source file is a multiple threaded implementation to perform an | |
extremely fast Segmented Sieve of Zakiya (SSoZ) to find Twin Primes <= N. | |
Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1. | |
Output is the number of twin primes <= N, or in range N1 to N2; the last | |
twin prime value for the range; and the total time of execution. | |
Code originally developed on a System76 laptop with an Intel I7 6700HQ cpu, | |
2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning | |
would be needed to optimize for other hardware systems (ARM, PowerPC, etc). | |
For Nim >= 2.0.0 compile as: | |
$ nim c --cc:gcc --mm:arc --d:danger --d:release --threads:on --d:useMalloc twinprimes_ssozgist_new.nim | |
Then run executable: $ ./twinprimes_ssozgist1 <cr>, and enter range values. | |
Or alternatively: $ echo <range1> <range2> | ./twinprimes_ssoz | |
Range values can be provided in either order (larger or smaller first). | |
This source file, and updates, will be available here: | |
https://gist.github.com/jzakiya/6c7e1868bd749a6b1add62e3e3b2341e | |
Mathematical and technical basis for implementation are explained here: | |
https://www.academia.edu/37952623/The_Use_of_Prime_Generators_to_Implement_Fast_ | |
Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_ | |
for_the_Riemann_Hypotheses | |
https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ_ | |
https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK | |
https://www.academia.edu/81206391/Twin_Primes_Segmented_Sieve_of_Zakiya_SSoZ_Explained | |
This code is provided free and subject to copyright and terms of the | |
GNU General Public License Version 3, GPLv3, or greater. | |
License copy/terms are here: http://www.gnu.org/licenses/ | |
Copyright (c) 2017-2024 Jabari Zakiya -- jzakiya at gmail dot com | |
Version Date: 2024/08/12 | |
]# | |
import math # for sqrt, gcd, mod functions | |
import strutils, typetraits # for number input | |
import times, os # for timing code execution | |
import osproc # for getting threads count | |
import threadpool # for parallel processing | |
import algorithm # for sort | |
import bitops # for countSetBits | |
{.experimental: "parallel".} # required to use 'parallel' (>= 1.4.x) | |
proc modinv(a0, b0: int): int = | |
## Compute modular inverse a^-1 of a to base b, e.g. a*(a^-1) mod b = 1. | |
var (a, b, x0) = (a0, b0, 0) | |
result = 1 | |
if b == 1: return | |
while a > 1: | |
result -= (a div b) * x0 | |
a = a mod b | |
swap a, b | |
swap x0, result | |
if result < 0: result += b0 | |
proc genPGparameters(prime: int): (int, int, int, seq[int], seq[int]) = | |
## Create prime generator parameters for given Pn | |
echo("using Prime Generator parameters for P", prime) | |
let primes = [2, 3, 5, 7, 11, 13, 17, 19, 23] | |
var (modpg, res_0) = (1, 0) # compute Pn's modulus and res_0 value | |
for prm in primes: (res_0 = prm; if prm > prime: break; modpg *= prm) | |
var restwins: seq[int] = @[] # save upper twin pair residues here | |
var inverses = newSeq[int](modpg+2) # save Pn's residues inverses here | |
var (rc, inc, res) = (5, 2, 0) # use P3's PGS to generate residue candidates | |
while rc < (modpg shr 1): # find PG's 1st half residues | |
if gcd(modpg, rc) == 1: # if rc is a residue | |
let mc = modpg - rc # create its modular complement | |
inverses[rc] = modinv(rc,modpg) # save rc amd mc inverses | |
inverses[mc] = modinv(mc,modpg) # if in twinpair save both hi residues | |
if res + 2 == rc: restwins.add(rc); restwins.add(mc + 2) | |
res = rc # save current found residue | |
rc += inc; inc = inc xor 0b110 # create next P3 sequence rc: 5 7 11 13 17 19 ... | |
restwins.sort(system.cmp[int]); restwins.add(modpg + 1) # last residue is last hi_tp | |
inverses[modpg + 1] = 1; inverses[modpg - 1] = modpg - 1 # last 2 resdiues are self inverses | |
(modpg, res_0, restwins.len, restwins, inverses) | |
# Global parameters | |
var | |
cnts: seq[uint] # holds twinprimes count for each twinpair | |
lastwins: seq[uint] # holds largest twinprime for each twinpair | |
proc setSieveParameters(start_num, end_num: uint): (uint, int, uint, uint, uint, uint, int, seq[int], seq[int]) = | |
## Select at runtime best PG and segment size factor to use for input value. | |
## These are good estimates derived from PG data profiling. Can be improved. | |
let range = end_num - start_num | |
var (Bn, pg) = (0, 3) | |
if end_num < 49'u: | |
Bn = 1; pg = 3 | |
elif range < 77_000_000: | |
Bn = 16; pg = 5 | |
elif range < 1_100_000_000'u: | |
Bn = 32; pg = 7 | |
elif range < 35_500_000_000'u: | |
Bn = 64; pg = 11 | |
elif range < 14_000_000_000_000'u: | |
pg = 13 | |
if range > 7_000_000_000_000'u: Bn = 384 | |
elif range > 2_500_000_000_000'u: Bn = 320 | |
elif range > 250_000_000_000'u: Bn = 196 | |
else: Bn = 128 | |
else: | |
Bn = 384; pg = 17 | |
let (modpg, res_0, pairscnt, restwins, resinvrs) = genPGparameters(pg) | |
let Kmin = (start_num - 2) div modpg.uint + 1 # number of resgroups to start_num | |
let Kmax = (end_num - 2) div modpg.uint + 1 # number of resgroups to end_num | |
let krange = Kmax - Kmin + 1 # number of range resgroups, at least 1 | |
let n = if krange < 37_500_000_000_000'u: 10 elif krange < 975_000_000_000_000'u: 16 else: 20 | |
let B = uint(Bn * 1024 * n) # set seg size to optimize for selected PG | |
let Ks = if krange < B: krange else: B # segments resgroups size | |
echo("segment size = ",Ks," resgroups; seg array is [1 x ",((Ks-1) shr 6) + 1,"] 64-bits") | |
let maxpairs = krange.int * pairscnt; # maximum number of twinprime pcs | |
echo("twinprime candidates = ", maxpairs, "; resgroups = ", krange); | |
(modpg.uint, res_0, Ks, Kmin, Kmax, krange, pairscnt, restwins, resinvrs) | |
proc sozp5(val: int | int64, res_0: int, start_num, end_num: uint): seq[int] = | |
## Return the primes r0..sqrt(end_num) within range (start_num...end_num) | |
let (md, rescnt) = (30, 8) # P5's modulus and residues count | |
let res = [7,11,13,17,19,23,29,31] # P5's residues list | |
let bitn = [0,0,0,0,0,1,0,0,0,2,0,4,0,0,0,8,0,16,0,0,0,32,0,0,0,0,0,64,0,128] | |
let range_size = end_num - start_num # integers size for inputs range | |
let kmax = (val - 2) div md + 1 # number of resgroups to input value | |
var prms = newSeq[uint8](kmax) # byte array of prime candidates init '0' | |
let sqrtn = int(sqrt float64(val)) # compute integer sqrt of val | |
var k = sqrtn div md # compute its resgroup value | |
var (resk, r) = (sqrtn - md*k, 0) # compute its residue value; set residue start posn | |
while resk >= res[r]: r += 1 # find largest residue <= sqrtn posn in its resgroup | |
let pcs_to_sqrtn = k*rescnt + r # number of pcs <= sqrtn | |
for i in 0..pcs_to_sqrtn: # for r0..sqrtN primes mark their multiples | |
let (k, r) = (i div rescnt, i mod rescnt) # update resgroup parameters | |
if (prms[k] and uint8(1 shl r)) != 0: continue # skip pc if not prime | |
let prm_r = res[r] # if prime save its residue value | |
let prime = md*k + prm_r # numerate its value | |
let rem = start_num mod uint(prime) # prime's modular distance to start_num | |
if not(uint(prime) - rem <= range_size or rem == 0): continue # skip prime if no multiple in range | |
for ri in res: # mark prime's multiples in prms | |
let prod = prm_r * ri - 2 # compute cross-product for prm_r|ri pair | |
let bit_r = uint8(bitn[prod mod md]) # bit mask value for prod's residue | |
var kpm = k * (prime + ri) + prod div md # resgroup for prime's 1st multiple | |
while kpm < kmax: prms[kpm] = prms[kpm] or bit_r; kpm += prime | |
# prms now contains the prime multiples positions marked for the pcs r0..N | |
# now along each restrack, identify|numerate the primes in each resgroup | |
# return only the primes with a multiple within range (start_num...end_num) | |
result = @[] # create empty dynamic array for primes | |
for r, res_r in res: # along each restrack|row til end | |
for k, resgroup in prms: # for each resgroup along restrack | |
if (resgroup and uint8(1 shl r)) == 0: # if bit location a prime | |
let prime = md * k + res_r # numerate its value, store if in range | |
# check if prime has multiple in range, if so keep it, if not don't | |
let rem = start_num mod prime.uint | |
if (prime in res_0..val) and (prime.uint - rem <= range_size or rem == 0): result.add(prime) | |
proc nextp_init(hi_r: int, kmin: uint, modpg: int, primes: seq[int], resinvrs: seq[int]): seq[uint64] = | |
## Initialize 'nextp' array for twinpair upper residue rhi in 'restwins'. | |
## Compute 1st prime multiple resgroups for each prime r0..sqrt(N) and | |
## store consecutively as lo_tp|hi_tp pairs for their restracks. | |
var nextp = newSeq[uint64](primes.len * 2) # 1st mults array for twinpair | |
let (r_hi, r_lo) = (hi_r, hi_r - 2) # upper|lower twinpair residues vals | |
for j, prime in primes: # for each prime r0..sqrt(N) | |
let k = (prime - 2) div modpg # find the resgroup it's in | |
let r = (prime - 2) mod modpg + 2 # and its residue value | |
let r_inv = resinvrs[r] # and its residue inverse | |
var ri = (r_lo * r_inv - 2) mod modpg + 2 # compute r's ri for r_lo | |
var ki = uint(k * (prime + ri) + (r * ri - 2) div modpg) # and 1st mult | |
if ki < kmin: # if 1st mult index < start_num's | |
ki = (kmin - ki) mod prime.uint # how many indices short is it | |
if ki > 0'u: ki = prime.uint - ki # adjust index value into range | |
else: ki = ki - kmin # else here, adjust index if it was > | |
nextp[j * 2] = ki # lo_tp index val >= start of range | |
ri = (r_hi * r_inv - 2) mod modpg + 2 # compute r's ri for r_hi | |
ki = uint(k * (prime + ri) + (r * ri - 2) div modpg) # and 1st mult | |
if ki < kmin: # if 1st mult index < start_num's | |
ki = (kmin - ki) mod prime.uint # how many indices short is it | |
if ki > 0'u: ki = prime.uint - ki # adjust index value into range | |
else: ki = ki - kmin # else here, adjust index if it was > | |
nextp[j * 2 or 1] = ki # hi_tp index val >= start of range | |
result = nextp | |
proc twins_sieve(r_hi, kmin, kmax, Ks, start_num, end_num, modpg: uint, primes: seq[int], resinvrs: seq[int], indx: int) = | |
## Perform in thread the ssoz for given twinpair residues for Kmax resgroups. | |
## First create|init 'nextp' array of 1st prime mults for given twinpair, | |
## stored consequtively in 'nextp', and init seg array for Ks resgroups. | |
## For sieve, mark resgroup bits to '1' if either twinpair restrack is nonprime | |
## for primes mults resgroups, and update 'nextp' restrack slices acccordingly. | |
## Find last twin prime|sum for range, store at array[indx] for this twinpair. | |
## Can optionally compile to print mid twinprime values generated by twinpair. | |
{.gcsafe.}: | |
const S = 6 # shift value for 64 bits | |
const BMASK = (1 shl S) - 1 # bitmask val for 64 bits | |
var (sum, Ki, Kn) = (0'u, kmin-1, Ks.int) # init these parameters | |
var (hi_tp, Kmax) = (0'u, kmax) # max twinprime|resgroup val | |
var seg = newSeq[uint](((Ks-1) shr S) + 1) # seg array for Ks resgroups | |
if r_hi - 2 < (start_num - 2) mod modpg + 2: Ki.inc # ensure lo tps in range | |
if r_hi > (end_num - 2) mod modpg + 2: Kmax.dec # ensure hi tps in range | |
var nextp = nextp_init(r_hi.int, Ki, modpg.int, primes, resinvrs) # init nextp array | |
while Ki < Kmax: # for Ks resgroup size slices upto Kmax | |
if Ks > (Kmax - Ki): Kn = int(Kmax - Ki) # adjust Kn size for last seg | |
for j, prime in primes: # for each prime r0..sqrt(N) | |
# for lower twinpair residue track | |
var k1 = nextp[j * 2].int # starting from this resgroup in seg | |
while k1 < Kn: # mark primenth resgroup bits prime mults | |
seg[k1 shr S] = seg[k1 shr S] or (1'u shl (k1 and BMASK)).uint | |
k1 += prime # set next prime multiple resgroup | |
nextp[j * 2] = uint(k1 - Kn) # save 1st resgroup in next eligible seg | |
# for upper twinpair residue track | |
var k2 = nextp[j * 2 or 1].int # starting from this resgroup in seg | |
while k2 < Kn: # mark primenth resgroup bits prime mults | |
seg[k2 shr S] = seg[k2 shr S] or (1'u shl (k2 and BMASK)).uint | |
k2 += prime # set next prime multiple resgroup | |
nextp[j * 2 or 1] = uint(k2 - Kn) # save 1st resgroup in next eligible seg | |
# set as nonprime unused bits in last seg[n] | |
# so fast, do for every seg[i] | |
seg[(Kn-1) shr S] = seg[(Kn-1) shr S] or (not 1'u shl ((Kn-1) and BMASK)).uint | |
var cnt = 0 # count the twinprimes in the segment | |
for m in seg[0..(Kn-1) shr S]: cnt += (1 shl S) - countSetBits(m) | |
if cnt > 0: # if segment has twin primes | |
sum += cnt.uint # add segment count to total range count | |
var upk = Kn - 1 # from end of seg count back to largest tp | |
while (seg[upk shr S] and (1'u shl (upk and BMASK)).uint) != 0: upk.dec | |
hi_tp = Ki + upk.uint # numerate its full resgroup value | |
Ki += Ks # set 1st resgroup val of next seg slice | |
if Ki < Kmax: (for b in 0..(Kn - 1) shr S: seg[b] = 0) # set seg to all primes | |
# when sieve done for full range | |
# numerate largest twinprime in segs | |
hi_tp = if r_hi > end_num: 0'u else: hi_tp * modpg.uint + r_hi | |
lastwins[indx] = if sum == 0: 1'u else: hi_tp # store final seg tp value | |
cnts[indx] = sum # sum for twin pair | |
proc twinprimes_ssoz() = | |
## Main routine to setup, time, and display results for twin primes sieve. | |
let x = stdin.readline.split # Inputs are 1 or 2 range values < 2**64 | |
var end_num = max(x[0].parseUInt, 3'u) | |
var start_num = if x.len > 1: max(x[1].parseUInt, 3'u) else: 3'u | |
if start_num > end_num: swap start_num, end_num | |
start_num = start_num or 1 # if start_num even add 1 | |
end_num = (end_num - 1) or 1 # if end_num even subtract 1 | |
if end_num - start_num < 2: (start_num, end_num) = (7'u, 7'u) | |
echo("threads = ", countProcessors()) | |
let ts = epochTime() # start timing sieve setup execution | |
# select Pn, set sieving params for inputs | |
let (modpg, res_0, Ks, Kmin, Kmax, Krange, | |
pairscnt, restwins, resinvrs) = setSieveParameters(start_num, end_num) | |
# create sieve primes <= sqrt(end_num), only use those whose multiples within inputs range | |
let primes: seq[int] = if end_num < 49: @[5] | |
else: sozp5(int(sqrt float64(end_num)), res_0, start_num, end_num) | |
echo("each of ", pairscnt, " threads has nextp[", 2, " x ", primes.len, "] array") | |
var twinscnt = 0'u64 # init twinprimes range count | |
let lo_range = uint(restwins[0] - 3) # lo_range = lo_tp - 1 | |
for tp in [3, 5, 11, 17]: # excluded low tp values for PGs used | |
if end_num == 3'u: break # if 3 end of range, no twin primes | |
if tp.uint in start_num..lo_range: twinscnt += 1 # cnt small tps if any | |
let te = epochTime() - ts # sieve setup time | |
echo("setup time = ", te.formatFloat(ffDecimal, 6), " secs") | |
echo("perform twinprimes ssoz sieve") | |
let t1 = epochTime() # start timing ssoz sieve execution | |
cnts = newSeq[uint](pairscnt) # array to hold count of tps for each thread | |
lastwins = newSeq[uint](pairscnt) # array to hold largest tp for each thread | |
#parallel: # perform in parallel | |
for indx, r_hi in restwins: # for each twin pair row index | |
spawn twins_sieve(r_hi.uint, Kmin, Kmax, Ks, start_num, end_num, modpg, primes, resinvrs, indx) | |
stdout.write("\r", (indx + 1), " of ", pairscnt, " twinpairs done") | |
sync() # when all the threads finish | |
var last_twin = 0'u # find largest twin prime in range | |
for i in 0 ..< pairscnt: # and determine total twin primes count | |
twinscnt += cnts[i] | |
if last_twin < lastwins[i]: last_twin = lastwins[i] | |
if end_num == 5'u and twinscnt == 1: last_twin = 5'u | |
var Kn = Krange mod Ks # set number of resgroups in last slice | |
if Kn == 0: Kn = Ks # if multiple of seg size set to seg size | |
let t2 = epochTime() - t1 # sieve execution time | |
echo("\nsieve time = ", t2.formatFloat(ffDecimal, 3), " secs") | |
echo("total time = ", (t2 + te).formatFloat(ffDecimal, 3), " secs") | |
echo("last segment = ", Kn, " resgroups; segment slices = ", (Krange - 1) div Ks + 1) | |
echo("total twins = ", twinscnt, "; last twin = ", last_twin - 1, "+/-1") | |
twinprimes_ssoz() |
Pascal66
commented
Dec 17, 2019
via email
My fault. I’ve give 500000000000 60000000000 in args from command line
Instead answering when i’m not asked to do it
I’m working with biginteger (no limit, or memory limit) since the begining.
From: Jabari Zakiya
Sent: Monday, December 16, 2019 6:58 PM
To: jzakiya
Cc: Priveyes ; Comment
Subject: Re: jzakiya/twinprimes_ssoz.nim
It works for me! 😀
Your system has to have enough memory to store everything though.
➜ nim inxi -SCMI
System: Host: localhost.localdomain Kernel: 5.4.3-pclos1 x86_64 bits: 64
Desktop: KDE Plasma 5.17.4 Distro: PCLinuxOS 2020
Machine: Type: Laptop System: System76 product: Gazelle v: gaze10 serial:
<root required>
Mobo: System76 model: Gazelle v: gaze10 serial: <root required> UEFI
[Legacy]: American Megatrends v: 1.05.08
date: 03/31/2016
CPU: Topology: Quad Core model: Intel Core i7-6700HQ bits: 64 type:
MT MCP L2 cache: 6144 KiB
Speed: 800 MHz min/max: 800/3500 MHz Core speeds (MHz): 1: 800 2:
800 3: 800 4: 801 5: 800 6: 800 7: 800 8: 800
Info: Processes: 310 Uptime: 3h 13m Memory: 15.53 GiB used: 6.51 GiB
(41.9%) Shell: zsh inxi: 3.0.37
--------------------------------------------------------------------------------------------
➜ nim echo 500_000_000_000 600_000_000_000 | ./twinprimes_ssozdual1a
threads = 8
each thread segment is [1 x 131072] bytes array
twinprime candidates = 4945055940; resgroups = 3330004
each 1485 threads has nextp[2 x 62068] array
setup time = 0.011448 secs
perform twinprimes ssoz sieve
1485 of 1485 threads done
sieve time = 7.462 secs
last segment = 53204 resgroups; segment slices = 26
total twins = 180694619; last twin = 599999999772+/-1
total time = 7.474 secs
-------------------------------------------------------------
Jabari
On Mon, Dec 16, 2019 at 11:18 AM Priveyes ***@***.***> wrote:
This doesnt work :
nim-1.0.4>twinprimes_ssoz 500000000000 600000000000
strutils.nim(1118) parseUInt
Error: unhandled exception: invalid unsigned integer: [ValueError]
—
You are receiving this because you authored the thread.
Reply to this email directly, view it on GitHub
<https://gist.github.com/6c7e1868bd749a6b1add62e3e3b2341e?email_source=notifications&email_token=AAARBYGHXAJFTVNG44V6O73QY6S43A5CNFSM4JRVSTUKYY3PNVWWK3TUL52HS4DFVNDWS43UINXW23LFNZ2KUY3PNVWWK3TUL5UWJTQAF6ACO#gistcomment-3112999>,
or unsubscribe
<https://github.com/notifications/unsubscribe-auth/AAARBYDNTQQBQGMQTSMUISTQY6S43ANCNFSM4JRVSTUA>
.
—
You are receiving this because you commented.
Reply to this email directly, view it on GitHub, or unsubscribe.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment