Skip to content

Instantly share code, notes, and snippets.

@jzakiya
Last active August 12, 2024 17:06
Show Gist options
  • Save jzakiya/6c7e1868bd749a6b1add62e3e3b2341e to your computer and use it in GitHub Desktop.
Save jzakiya/6c7e1868bd749a6b1add62e3e3b2341e to your computer and use it in GitHub Desktop.
Twinprimes generator, multi-threaded, using SSoZ (Segmented Sieve of Zakiya), written in Nim
#[
This Nim source file is a multiple threaded implementation to perform an
extremely fast Segmented Sieve of Zakiya (SSoZ) to find Twin Primes <= N.
Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1.
Output is the number of twin primes <= N, or in range N1 to N2; the last
twin prime value for the range; and the total time of execution.
Code originally developed on a System76 laptop with an Intel I7 6700HQ cpu,
2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning
would be needed to optimize for other hardware systems (ARM, PowerPC, etc).
For Nim >= 2.0.0 compile as:
$ nim c --cc:gcc --mm:arc --d:danger --d:release --threads:on --d:useMalloc twinprimes_ssozgist_new.nim
Then run executable: $ ./twinprimes_ssozgist1 <cr>, and enter range values.
Or alternatively: $ echo <range1> <range2> | ./twinprimes_ssoz
Range values can be provided in either order (larger or smaller first).
This source file, and updates, will be available here:
https://gist.github.com/jzakiya/6c7e1868bd749a6b1add62e3e3b2341e
Mathematical and technical basis for implementation are explained here:
https://www.academia.edu/37952623/The_Use_of_Prime_Generators_to_Implement_Fast_
Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_
for_the_Riemann_Hypotheses
https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ_
https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK
https://www.academia.edu/81206391/Twin_Primes_Segmented_Sieve_of_Zakiya_SSoZ_Explained
This code is provided free and subject to copyright and terms of the
GNU General Public License Version 3, GPLv3, or greater.
License copy/terms are here: http://www.gnu.org/licenses/
Copyright (c) 2017-2024 Jabari Zakiya -- jzakiya at gmail dot com
Version Date: 2024/08/12
]#
import math # for sqrt, gcd, mod functions
import strutils, typetraits # for number input
import times, os # for timing code execution
import osproc # for getting threads count
import threadpool # for parallel processing
import algorithm # for sort
import bitops # for countSetBits
{.experimental: "parallel".} # required to use 'parallel' (>= 1.4.x)
proc modinv(a0, b0: int): int =
## Compute modular inverse a^-1 of a to base b, e.g. a*(a^-1) mod b = 1.
var (a, b, x0) = (a0, b0, 0)
result = 1
if b == 1: return
while a > 1:
result -= (a div b) * x0
a = a mod b
swap a, b
swap x0, result
if result < 0: result += b0
proc genPGparameters(prime: int): (int, int, int, seq[int], seq[int]) =
## Create prime generator parameters for given Pn
echo("using Prime Generator parameters for P", prime)
let primes = [2, 3, 5, 7, 11, 13, 17, 19, 23]
var (modpg, res_0) = (1, 0) # compute Pn's modulus and res_0 value
for prm in primes: (res_0 = prm; if prm > prime: break; modpg *= prm)
var restwins: seq[int] = @[] # save upper twin pair residues here
var inverses = newSeq[int](modpg+2) # save Pn's residues inverses here
var (rc, inc, res) = (5, 2, 0) # use P3's PGS to generate residue candidates
while rc < (modpg shr 1): # find PG's 1st half residues
if gcd(modpg, rc) == 1: # if rc is a residue
let mc = modpg - rc # create its modular complement
inverses[rc] = modinv(rc,modpg) # save rc amd mc inverses
inverses[mc] = modinv(mc,modpg) # if in twinpair save both hi residues
if res + 2 == rc: restwins.add(rc); restwins.add(mc + 2)
res = rc # save current found residue
rc += inc; inc = inc xor 0b110 # create next P3 sequence rc: 5 7 11 13 17 19 ...
restwins.sort(system.cmp[int]); restwins.add(modpg + 1) # last residue is last hi_tp
inverses[modpg + 1] = 1; inverses[modpg - 1] = modpg - 1 # last 2 resdiues are self inverses
(modpg, res_0, restwins.len, restwins, inverses)
# Global parameters
var
cnts: seq[uint] # holds twinprimes count for each twinpair
lastwins: seq[uint] # holds largest twinprime for each twinpair
proc setSieveParameters(start_num, end_num: uint): (uint, int, uint, uint, uint, uint, int, seq[int], seq[int]) =
## Select at runtime best PG and segment size factor to use for input value.
## These are good estimates derived from PG data profiling. Can be improved.
let range = end_num - start_num
var (Bn, pg) = (0, 3)
if end_num < 49'u:
Bn = 1; pg = 3
elif range < 77_000_000:
Bn = 16; pg = 5
elif range < 1_100_000_000'u:
Bn = 32; pg = 7
elif range < 35_500_000_000'u:
Bn = 64; pg = 11
elif range < 14_000_000_000_000'u:
pg = 13
if range > 7_000_000_000_000'u: Bn = 384
elif range > 2_500_000_000_000'u: Bn = 320
elif range > 250_000_000_000'u: Bn = 196
else: Bn = 128
else:
Bn = 384; pg = 17
let (modpg, res_0, pairscnt, restwins, resinvrs) = genPGparameters(pg)
let Kmin = (start_num - 2) div modpg.uint + 1 # number of resgroups to start_num
let Kmax = (end_num - 2) div modpg.uint + 1 # number of resgroups to end_num
let krange = Kmax - Kmin + 1 # number of range resgroups, at least 1
let n = if krange < 37_500_000_000_000'u: 10 elif krange < 975_000_000_000_000'u: 16 else: 20
let B = uint(Bn * 1024 * n) # set seg size to optimize for selected PG
let Ks = if krange < B: krange else: B # segments resgroups size
echo("segment size = ",Ks," resgroups; seg array is [1 x ",((Ks-1) shr 6) + 1,"] 64-bits")
let maxpairs = krange.int * pairscnt; # maximum number of twinprime pcs
echo("twinprime candidates = ", maxpairs, "; resgroups = ", krange);
(modpg.uint, res_0, Ks, Kmin, Kmax, krange, pairscnt, restwins, resinvrs)
proc sozp5(val: int | int64, res_0: int, start_num, end_num: uint): seq[int] =
## Return the primes r0..sqrt(end_num) within range (start_num...end_num)
let (md, rescnt) = (30, 8) # P5's modulus and residues count
let res = [7,11,13,17,19,23,29,31] # P5's residues list
let bitn = [0,0,0,0,0,1,0,0,0,2,0,4,0,0,0,8,0,16,0,0,0,32,0,0,0,0,0,64,0,128]
let range_size = end_num - start_num # integers size for inputs range
let kmax = (val - 2) div md + 1 # number of resgroups to input value
var prms = newSeq[uint8](kmax) # byte array of prime candidates init '0'
let sqrtn = int(sqrt float64(val)) # compute integer sqrt of val
var k = sqrtn div md # compute its resgroup value
var (resk, r) = (sqrtn - md*k, 0) # compute its residue value; set residue start posn
while resk >= res[r]: r += 1 # find largest residue <= sqrtn posn in its resgroup
let pcs_to_sqrtn = k*rescnt + r # number of pcs <= sqrtn
for i in 0..pcs_to_sqrtn: # for r0..sqrtN primes mark their multiples
let (k, r) = (i div rescnt, i mod rescnt) # update resgroup parameters
if (prms[k] and uint8(1 shl r)) != 0: continue # skip pc if not prime
let prm_r = res[r] # if prime save its residue value
let prime = md*k + prm_r # numerate its value
let rem = start_num mod uint(prime) # prime's modular distance to start_num
if not(uint(prime) - rem <= range_size or rem == 0): continue # skip prime if no multiple in range
for ri in res: # mark prime's multiples in prms
let prod = prm_r * ri - 2 # compute cross-product for prm_r|ri pair
let bit_r = uint8(bitn[prod mod md]) # bit mask value for prod's residue
var kpm = k * (prime + ri) + prod div md # resgroup for prime's 1st multiple
while kpm < kmax: prms[kpm] = prms[kpm] or bit_r; kpm += prime
# prms now contains the prime multiples positions marked for the pcs r0..N
# now along each restrack, identify|numerate the primes in each resgroup
# return only the primes with a multiple within range (start_num...end_num)
result = @[] # create empty dynamic array for primes
for r, res_r in res: # along each restrack|row til end
for k, resgroup in prms: # for each resgroup along restrack
if (resgroup and uint8(1 shl r)) == 0: # if bit location a prime
let prime = md * k + res_r # numerate its value, store if in range
# check if prime has multiple in range, if so keep it, if not don't
let rem = start_num mod prime.uint
if (prime in res_0..val) and (prime.uint - rem <= range_size or rem == 0): result.add(prime)
proc nextp_init(hi_r: int, kmin: uint, modpg: int, primes: seq[int], resinvrs: seq[int]): seq[uint64] =
## Initialize 'nextp' array for twinpair upper residue rhi in 'restwins'.
## Compute 1st prime multiple resgroups for each prime r0..sqrt(N) and
## store consecutively as lo_tp|hi_tp pairs for their restracks.
var nextp = newSeq[uint64](primes.len * 2) # 1st mults array for twinpair
let (r_hi, r_lo) = (hi_r, hi_r - 2) # upper|lower twinpair residues vals
for j, prime in primes: # for each prime r0..sqrt(N)
let k = (prime - 2) div modpg # find the resgroup it's in
let r = (prime - 2) mod modpg + 2 # and its residue value
let r_inv = resinvrs[r] # and its residue inverse
var ri = (r_lo * r_inv - 2) mod modpg + 2 # compute r's ri for r_lo
var ki = uint(k * (prime + ri) + (r * ri - 2) div modpg) # and 1st mult
if ki < kmin: # if 1st mult index < start_num's
ki = (kmin - ki) mod prime.uint # how many indices short is it
if ki > 0'u: ki = prime.uint - ki # adjust index value into range
else: ki = ki - kmin # else here, adjust index if it was >
nextp[j * 2] = ki # lo_tp index val >= start of range
ri = (r_hi * r_inv - 2) mod modpg + 2 # compute r's ri for r_hi
ki = uint(k * (prime + ri) + (r * ri - 2) div modpg) # and 1st mult
if ki < kmin: # if 1st mult index < start_num's
ki = (kmin - ki) mod prime.uint # how many indices short is it
if ki > 0'u: ki = prime.uint - ki # adjust index value into range
else: ki = ki - kmin # else here, adjust index if it was >
nextp[j * 2 or 1] = ki # hi_tp index val >= start of range
result = nextp
proc twins_sieve(r_hi, kmin, kmax, Ks, start_num, end_num, modpg: uint, primes: seq[int], resinvrs: seq[int], indx: int) =
## Perform in thread the ssoz for given twinpair residues for Kmax resgroups.
## First create|init 'nextp' array of 1st prime mults for given twinpair,
## stored consequtively in 'nextp', and init seg array for Ks resgroups.
## For sieve, mark resgroup bits to '1' if either twinpair restrack is nonprime
## for primes mults resgroups, and update 'nextp' restrack slices acccordingly.
## Find last twin prime|sum for range, store at array[indx] for this twinpair.
## Can optionally compile to print mid twinprime values generated by twinpair.
{.gcsafe.}:
const S = 6 # shift value for 64 bits
const BMASK = (1 shl S) - 1 # bitmask val for 64 bits
var (sum, Ki, Kn) = (0'u, kmin-1, Ks.int) # init these parameters
var (hi_tp, Kmax) = (0'u, kmax) # max twinprime|resgroup val
var seg = newSeq[uint](((Ks-1) shr S) + 1) # seg array for Ks resgroups
if r_hi - 2 < (start_num - 2) mod modpg + 2: Ki.inc # ensure lo tps in range
if r_hi > (end_num - 2) mod modpg + 2: Kmax.dec # ensure hi tps in range
var nextp = nextp_init(r_hi.int, Ki, modpg.int, primes, resinvrs) # init nextp array
while Ki < Kmax: # for Ks resgroup size slices upto Kmax
if Ks > (Kmax - Ki): Kn = int(Kmax - Ki) # adjust Kn size for last seg
for j, prime in primes: # for each prime r0..sqrt(N)
# for lower twinpair residue track
var k1 = nextp[j * 2].int # starting from this resgroup in seg
while k1 < Kn: # mark primenth resgroup bits prime mults
seg[k1 shr S] = seg[k1 shr S] or (1'u shl (k1 and BMASK)).uint
k1 += prime # set next prime multiple resgroup
nextp[j * 2] = uint(k1 - Kn) # save 1st resgroup in next eligible seg
# for upper twinpair residue track
var k2 = nextp[j * 2 or 1].int # starting from this resgroup in seg
while k2 < Kn: # mark primenth resgroup bits prime mults
seg[k2 shr S] = seg[k2 shr S] or (1'u shl (k2 and BMASK)).uint
k2 += prime # set next prime multiple resgroup
nextp[j * 2 or 1] = uint(k2 - Kn) # save 1st resgroup in next eligible seg
# set as nonprime unused bits in last seg[n]
# so fast, do for every seg[i]
seg[(Kn-1) shr S] = seg[(Kn-1) shr S] or (not 1'u shl ((Kn-1) and BMASK)).uint
var cnt = 0 # count the twinprimes in the segment
for m in seg[0..(Kn-1) shr S]: cnt += (1 shl S) - countSetBits(m)
if cnt > 0: # if segment has twin primes
sum += cnt.uint # add segment count to total range count
var upk = Kn - 1 # from end of seg count back to largest tp
while (seg[upk shr S] and (1'u shl (upk and BMASK)).uint) != 0: upk.dec
hi_tp = Ki + upk.uint # numerate its full resgroup value
Ki += Ks # set 1st resgroup val of next seg slice
if Ki < Kmax: (for b in 0..(Kn - 1) shr S: seg[b] = 0) # set seg to all primes
# when sieve done for full range
# numerate largest twinprime in segs
hi_tp = if r_hi > end_num: 0'u else: hi_tp * modpg.uint + r_hi
lastwins[indx] = if sum == 0: 1'u else: hi_tp # store final seg tp value
cnts[indx] = sum # sum for twin pair
proc twinprimes_ssoz() =
## Main routine to setup, time, and display results for twin primes sieve.
let x = stdin.readline.split # Inputs are 1 or 2 range values < 2**64
var end_num = max(x[0].parseUInt, 3'u)
var start_num = if x.len > 1: max(x[1].parseUInt, 3'u) else: 3'u
if start_num > end_num: swap start_num, end_num
start_num = start_num or 1 # if start_num even add 1
end_num = (end_num - 1) or 1 # if end_num even subtract 1
if end_num - start_num < 2: (start_num, end_num) = (7'u, 7'u)
echo("threads = ", countProcessors())
let ts = epochTime() # start timing sieve setup execution
# select Pn, set sieving params for inputs
let (modpg, res_0, Ks, Kmin, Kmax, Krange,
pairscnt, restwins, resinvrs) = setSieveParameters(start_num, end_num)
# create sieve primes <= sqrt(end_num), only use those whose multiples within inputs range
let primes: seq[int] = if end_num < 49: @[5]
else: sozp5(int(sqrt float64(end_num)), res_0, start_num, end_num)
echo("each of ", pairscnt, " threads has nextp[", 2, " x ", primes.len, "] array")
var twinscnt = 0'u64 # init twinprimes range count
let lo_range = uint(restwins[0] - 3) # lo_range = lo_tp - 1
for tp in [3, 5, 11, 17]: # excluded low tp values for PGs used
if end_num == 3'u: break # if 3 end of range, no twin primes
if tp.uint in start_num..lo_range: twinscnt += 1 # cnt small tps if any
let te = epochTime() - ts # sieve setup time
echo("setup time = ", te.formatFloat(ffDecimal, 6), " secs")
echo("perform twinprimes ssoz sieve")
let t1 = epochTime() # start timing ssoz sieve execution
cnts = newSeq[uint](pairscnt) # array to hold count of tps for each thread
lastwins = newSeq[uint](pairscnt) # array to hold largest tp for each thread
#parallel: # perform in parallel
for indx, r_hi in restwins: # for each twin pair row index
spawn twins_sieve(r_hi.uint, Kmin, Kmax, Ks, start_num, end_num, modpg, primes, resinvrs, indx)
stdout.write("\r", (indx + 1), " of ", pairscnt, " twinpairs done")
sync() # when all the threads finish
var last_twin = 0'u # find largest twin prime in range
for i in 0 ..< pairscnt: # and determine total twin primes count
twinscnt += cnts[i]
if last_twin < lastwins[i]: last_twin = lastwins[i]
if end_num == 5'u and twinscnt == 1: last_twin = 5'u
var Kn = Krange mod Ks # set number of resgroups in last slice
if Kn == 0: Kn = Ks # if multiple of seg size set to seg size
let t2 = epochTime() - t1 # sieve execution time
echo("\nsieve time = ", t2.formatFloat(ffDecimal, 3), " secs")
echo("total time = ", (t2 + te).formatFloat(ffDecimal, 3), " secs")
echo("last segment = ", Kn, " resgroups; segment slices = ", (Krange - 1) div Ks + 1)
echo("total twins = ", twinscnt, "; last twin = ", last_twin - 1, "+/-1")
twinprimes_ssoz()
@Pascal66
Copy link

Pascal66 commented Dec 16, 2019

This doesnt work in Nim:
nim-1.0.4>twinprimes_ssoz 500000000000 600000000000
strutils.nim(1118) parseUInt
Error: unhandled exception: invalid unsigned integer: [ValueError]

This is working in Java:
Please enter an range of integer (comma or space separated):
5e11 6e11
Max threads = 8
generating parameters for P 13
each thread segment is [1 x 131072] bytes array
twinprime candidates = 4945055940; resgroups = 3330004
each 1485 threads has nextp[2 x 62068] array
setup time = 0.119 secs
perform twinprimes ssoz sieve with s=0
1485 of 1485 threads done
sieve time = 23.375 secs
last segment = 53204 resgroups; segment slices = 26
total twins = 328566078; last twin = 599999999772+/-1
total time = 23.494 secs

@jzakiya
Copy link
Author

jzakiya commented Dec 16, 2019 via email

@Pascal66
Copy link

Pascal66 commented Dec 17, 2019 via email

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment