Skip to content

Instantly share code, notes, and snippets.

@jzakiya
Last active August 12, 2024 16:53
Show Gist options
  • Save jzakiya/0ea756a8f6fd09f56cd9374d0dcf4197 to your computer and use it in GitHub Desktop.
Save jzakiya/0ea756a8f6fd09f56cd9374d0dcf4197 to your computer and use it in GitHub Desktop.
Cousin Primes generator, multi-threaded, using SSoZ (Segmented Sieve of Zakiya), written in Go
// This Go source file is a multiple threaded implementation to perform an
// extremely fast Segmented Sieve of Zakiya (SSoZ) to find Cousin Primes <= N.
// Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1.
// Output is the number of cousiin primes <= N, or in range N1 to N2; the last
// cousin prime value for the range; and the total time of execution.
// Code originally developed on a System76 laptop with an Intel I7 6700HQ cpu,
// 2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning
// probably needed to optimize for other hardware systems (ARM, PowerPC, etc).
// Compile as: $ go build cousinprimes_ssoz.go
// To reduce binary size do: $ strip cousinprimes_ssoz
// Single val: $ echo val | ./cousinprimes_ssoz
// Range vals: $ echo val1 val2 | ./cousinprimes_ssoz
// Mathematical and technical basis for implementation are explained here:
// https://www.academia.edu/37952623/The_Use_of_Prime_Generators_to_Implement_Fast_
// Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_
// for_the_Riemann_Hypotheses
// https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ_
// https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK
// https://www.academia.edu/81206391/Twin_Primes_Segmented_Sieve_of_Zakiya_SSoZ_Explained
// This source code, and its updates, can be found here:
// https://gist.github.com/jzakiya/0ea756a8f6fd09f56cd9374d0dcf4197
// This code is provided free and subject to copyright and terms of the
// GNU General Public License Version 3, GPLv3, or greater.
// License copy/terms are here: http://www.gnu.org/licenses/
// Copyright (c) 2017-2024; Jabari Zakiya -- jzakiya at gmail dot com
// Last update: 2024/08/12
package main
import (
"fmt"
"time"
"sort"
"sync"
"math"
"math/bits"
"runtime"
"sync/atomic"
)
// Customized gcd to determine coprimality; n > m; m odd
func coprime(m, n int) bool {
for m | 1 != 1 { m, n = n % m, m }
return (m > 0)
}
// Compute modular inverse a^-1 to base m, e.g. a*(a^-1) mod m = 1
func modinv(a0, m0 int) int {
if m0 == 1 { return 1 }
a, m := a0, m0
x0, inv := 0, 1
for a > 1 {
inv -= (a / m) * x0
a, m = m, a % m
x0, inv = inv, x0
}
if inv < 0 { inv += m0 }
return inv
}
func gen_pg_parameters(prime int) (uint, uint, int, []int, []int) {
// Create prime generator parameters for given Pn
fmt.Printf("using Prime Generator parameters for P%d\n", prime)
primes := []int{2, 3, 5, 7, 11, 13, 17, 19, 23}
modpg, res_0 := 1, 0 // compute Pn's modulus and res_0 value
for _, prm := range primes { res_0 = prm; if prm > prime { break }; modpg *= prm }
rescousins := []int{} // save upper cousinpair residues here
inverses := make([]int, modpg + 2) // save Pn's residues inverses here
rc, inc, res := 5, 2, 0 // use P3's PGS to generate residue candidates
midmodpg := modpg >> 1 // mid value of modpg
for rc < midmodpg { // find PG's 1st half residues
if coprime(rc, modpg) { // if rc is a residue
var mc = modpg - rc // create its modular complement
inverses[rc] = modinv(rc, modpg) // save rc and mc inverses
inverses[mc] = modinv(mc, modpg) // if in cousinspair save both hi residues
if res + 4 == rc { rescousins = append(rescousins, rc, mc + 4) }
res = rc // save current found residue
}
rc += inc; inc ^= 0b110 // create next P3 sequence rc: 5 7 11 13 17 19 ...
}
rescousins = append(rescousins, midmodpg + 2); sort.Ints(rescousins)
inverses[modpg + 1] = 1; inverses[modpg - 1] = modpg - 1 // last 2 residues are self inverses
return uint(modpg), uint(res_0), len(rescousins) , rescousins, inverses
}
func set_sieve_parameters(start_num, end_num uint) (uint, uint, uint, uint, uint, uint, int, []int, []int) {
// Select at runtime best PG and segment size parameters for input values.
// These are good estimates derived from PG data profiling. Can be improved.
trange := end_num - start_num
var bn, pg = 0, 3
if end_num < 49 {
bn, pg = 1, 3
} else if trange < 77_000_000 {
bn, pg = 16, 5
} else if trange < 1_100_000_000 {
bn, pg = 32, 7
} else if trange < 35_500_000_000 {
bn, pg = 64, 11
} else if trange < 14_000_000_000_000 {
pg = 13
if trange > 7_000_000_000_000 { bn = 384
} else if trange > 2_500_000_000_000 { bn = 320
} else if trange > 250_000_000_000 { bn = 196
} else { bn = 128 }
} else {
bn, pg = 384, 17
}
modpg, res_0, pairscnt, rescousins, resinvrs := gen_pg_parameters(pg)
kmin := (start_num-2) / modpg + 1 // number of resgroups to start_num
kmax := (end_num - 2) / modpg + 1 // number of resgroups to end_num
krange := kmax - kmin + 1 // number of resgroups in range, at least 1
var ks, n = uint(0), 0
if krange < 37_500_000_000_000 { n = 10 } else if krange < 975_000_000_000_000 { n = 16 } else { n = 20}
b := uint(bn * 1024 * n) // set seg size to optimize for selected PG
if krange < b { ks = krange } else { ks = b } // segments resgroups size
fmt.Printf("segment size = %d resgroups; seg array is [1 x %d] 64-bits\n", ks, (((ks-1) >> 6) + 1))
maxpairs := krange * uint(pairscnt) // maximum number of cousinprime pcs
fmt.Printf("cousinprime candidates = %d; resgroups = %d\n", maxpairs, krange)
return modpg, res_0, ks, kmin, kmax, krange, pairscnt, rescousins, resinvrs
}
func sozp5(val, res_0, start_num, end_num uint) []uint {
// Return the primes r0..sqrt(end_num) within range (start_num...end_num)
md, rescnt := 30, 8 // P5's modulus and residues count
res := []int{7,11,13,17,19,23,29,31} // P5's residues
bitn := []int{0,0,0,0,0,1,0,0,0,2,0,4,0,0,0,8,0,16,0,0,0,32,0,0,0,0,0,64,0,128}
range_size := end_num - start_num // integers size of inputs range
kmax := (val - 2) / uint(md) + 1 // number of resgroups upto input value
prms := make([]uint8, kmax) // byte array of prime candidates, init '0'
sqrtn := int(math.Sqrt(float64(val))) // compute integer sqrt of val
k := sqrtn/rescnt; resk := sqrtn-md*k; var r = 0 // sqrtn resgroup|residue values, 1st res posn
for resk >= res[r] { r += 1 } // find largest residue <= sqrtn posn in its resgroup
pcs_to_sqrtn := k*rescnt + r // number of pcs <= sqrtn
for i := 0; i < pcs_to_sqrtn; i++ { // for r0..sqrtn primes mark their multiples
k := i/rescnt; r := i%rescnt // update resgroup parameters
if (prms[k] & (1 << r)) != 0 { continue } // skip pc if not prime
prm_r := res[r] // if prime save its residue value
prime := md*k + prm_r // numerate its value
rem := start_num % uint(prime) // prime's modular distance to start_num
if !(uint(prime) - rem <= range_size || rem == 0) { continue } // skip prime if no multiple in range
for _, ri := range res { // mark prime's multiples in prms
prod := prm_r * ri - 2 // compute cross-product for prm_r|ri pair
bit_r := uint8(bitn[prod % md]) // bit mask value for prod's residue
var kpm = k * (prime + ri) + prod / md // resgroup for prime's 1st multiple
for uint(kpm) < kmax { prms[kpm] |= bit_r; kpm += prime } // mark primes's multiples
} }
// prms now contains the prime multiples positions marked for the pcs r0..N
// now along each restrack, identify|numerate the primes in each resgroup
// return only the primes with a multiple within range (start_num...end_num)
primes := []uint{} // create empty dynamic array for primes
for i, r_i := range res { // along each restrack|row til end
for k, resgroup := range prms { // for each resgroup along restrack
if resgroup & (1 << i) == 0 { // if bit location a prime
prime := uint(md * k + r_i) // numerate its value, store if in range
// check if prime has multiple in range, if so keep it, if not don't
rem := start_num % prime // if rem is 0 then start_num multiple of prime
if (res_0 <= prime && prime <= val) && (prime - rem <= range_size || rem == 0) { primes = append(primes, prime) }
} } }
return primes // primes stored in restrack|row sequence order
}
func nextp_init(rhi, kmin, modpg uint, primes []uint, resinvrs []int) []uint {
// Initialize 'nextp' array for cousinpair upper residue rhi in 'rescousins'.
// Compute 1st prime multiple resgroups for each prime r0..sqrt(N) and
// store consecutively as lo_cp|hi_cp pairs for their restracks.
nextp := make([]uint,len(primes)*2) // 1st mults array for cousinpair
r_hi, r_lo := rhi, rhi - 4 // upper|lower cousinpair residue values
for j, prime := range primes { // for each prime r0..sqrt(N)
k := (prime - 2) / modpg // find the resgroup it's in
r := (prime - 2) % modpg + 2 // and its residue value
r_inv := uint(resinvrs[r]) // and residue inverse
rl := (r_lo * r_inv - 2) % modpg + 2 // compute r's ri for r_lo
rh := (r_hi * r_inv - 2) % modpg + 2 // compute r's ri for r_hi
kl := k * (prime + rl) + (r * rl - 2) / modpg // kl 1st mult resgroup
kh := k * (prime + rh) + (r * rh - 2) / modpg // kh 1st mult resgroup
if kl < kmin { kl = (kmin - kl) % prime; if kl > 0 { kl = prime - kl } } else { kl -= kmin }
if kh < kmin { kh = (kmin - kh) % prime; if kh > 0 { kh = prime - kh } } else { kh -= kmin }
nextp[j * 2] = uint(kl) // prime's 1st mult lo_cp resgroup val in range
nextp[j * 2 | 1] = uint(kh) // prime's 1st mult hi_cp resgroup val in range
}
return nextp
}
func cousins_sieve(rhi int, kmin, kmax, ks, start_num, end_num, modpg uint, primes []uint, resinvrs []int) (uint, uint) {
// Perform in thread the ssoz for given cousinpair residues for kmax resgroups.
// First create|init 'nextp' array of 1st prime mults for given cousinpair,
// stored consequtively in 'nextp', and init seg array for ks resgroups.
// For sieve, mark resgroup bits to '1' if either cousinpair restrack is nonprime
// for primes mults resgroups, and update 'nextp' restrack slices acccordingly.
// Return the last cousinprime|sum for the range for this cousinpair residues.
s := 6 // shift value for 64 bits
bmask := uint((1 << s) - 1) // bitmask val for 64 bits
sum, ki, kn, r_hi := 0, kmin-1, ks, uint(rhi) // init these parameters
hi_cp, k_max := uint(0), kmax // max cousinprime|resgroup
seg := make([]uint64, ((ks - 1) >> s) + 1) // seg array of ks resgroups
if r_hi - 4 < (start_num - 2) % modpg + 2 {ki += 1} // ensure lo cp in range
if r_hi > (end_num - 2) % modpg + 2 {k_max -= 1} // ensure hi cp in range
nextp := nextp_init(r_hi, ki, modpg, primes, resinvrs) // init nextp array
for ki < k_max { // sieve ks size slices upto kmax
if ks > k_max - ki {kn = k_max - ki} // adjust kn size for last seg
for j, prime := range primes { // for each prime r0..sqrt(N)
// for lower residue track
var k1 uint = nextp[j * 2] // starting from this resgroup in seg
for k1 < kn { // mark primenth resgroup bits prime mults
seg[k1 >> s] |= uint64(1) << (k1 & bmask)
k1 += prime } // set resgroup for prime's next multiple
nextp[j * 2] = k1 - kn // save 1st resgroup in next eligible seg
// for upper residue track
var k2 uint = nextp[j * 2 | 1] // starting from this resgroup in seg
for k2 < kn { // mark primenth resgroup bits prime mults
seg[k2 >> s] |= uint64(1) << (k2 & bmask)
k2 += prime } // set resgroup for prime's next multiple
nextp[j * 2 | 1] = k2 - kn // save 1st resgroup in next eligible seg
} // set as nonprime unused bits in last seg[n]
// so fast, do for every seg[i]
seg[(kn - 1) >> s] |= ^uint64(1) << ((kn - 1) & bmask)
cnt := 0 // count the cousinprimes in the segment
for i := 0; i <= int(kn - 1) >> s; i++ { cnt += bits.OnesCount64(^seg[i]) }
if cnt > 0 { // if segment has cousinprimes
sum += cnt // add the segment count to total count
var upk = kn - 1 // from end of seg count back to largest tp
for seg[upk >> s] & (uint64(1) << (upk & bmask)) != 0 { upk -= 1 }
hi_cp = upk + ki // set its full range resgroup value
}
ki += ks // set 1st resgroup val of next seg slice
if ki < k_max { for i := range seg { seg[i] = 0 } } // set seg bits to all primes
} // when sieve done, numerate largest cousin in range
// for small ranges w/o cousins set largest to 0
if r_hi > end_num || sum == 0 { hi_cp = 0 } else { hi_cp = hi_cp * modpg + r_hi }
return hi_cp, uint(sum) // return largest cousinprime|cousins count in range
}
func main() {
var start_num, end_num = uint(3), uint(3)
_, err := fmt.Scan(&end_num, &start_num)
if end_num < 3 { end_num = 3 }
if err != nil { start_num = 3 } else { if start_num < 3 { start_num = 3 } }
if start_num > end_num { start_num, end_num = end_num, start_num }
start_num |= 1 // if start_num even increase by 1
end_num = (end_num - 1) | 1 // if end_num even decrease by 1
if end_num - start_num < 4 { start_num, end_num = 7, 7 }
fmt.Printf("threads = %d\n", runtime.NumCPU())
ts := time.Now() // start timing sieve setup execution
// select Pn, set sieving params for inputs
modpg, res_0, ks, kmin, kmax, krange, pairscnt, rescousins, resinvrs := set_sieve_parameters(start_num, end_num)
// create sieve primes <= sqrt(end_num), only use those whose multiples within inputs range
var primes = []uint{}
if end_num < 49 { primes = []uint{5}
} else { primes = sozp5(uint(math.Sqrt(float64(end_num))), res_0, start_num, end_num) }
fmt.Printf("each of %d threads has nextp[2 x %d] array\n", pairscnt, len(primes))
te := time.Now()
fmt.Printf("setup time = %v \n", te.Sub(ts)) // display sieve setup time
fmt.Println("perform cousinprimes ssoz sieve")
t1 := time.Now() // start timing ssoz sieve execution
cnts := make([]uint, pairscnt) // cousins count for each cousinpair
lastcousins := make([]uint, pairscnt) // last|largest cousin per cousinpair
var threadscnt uint64; // count of finished threads
var wg sync.WaitGroup
for i, r_hi := range rescousins { // sieve cousinpair restracks
wg.Add(1)
go func(i, r_hi int) {
defer wg.Done()
l, c := cousins_sieve(r_hi, kmin, kmax, ks, start_num, end_num, modpg, primes, resinvrs)
lastcousins[i] = l; cnts[i] = c
fmt.Printf("\r%d of %d cousinpairs done", atomic.AddUint64(&threadscnt, 1), pairscnt)
}(i, r_hi)
}
wg.Wait()
fmt.Printf("\r%d of %d cousinpairs done", pairscnt, pairscnt)
cousinscnt := uint(0) // number of cousin primes in range
last_cousin := uint(0) // largest hi_cp cousin in range
if end_num < 49 { // check for cousins in low ranges
for _, c_hi := range []uint{11, 17, 23, 41, 47} {
if start_num <= c_hi - uint(4) && c_hi <= end_num { last_cousin = c_hi; cousinscnt += 1 }
}
} else { // check for cousins from sieve
for i := 0; i < pairscnt; i++ { // find largest cousinprime|count in range
cousinscnt += cnts[i]
if last_cousin < lastcousins[i] { last_cousin = lastcousins[i] }
} }
// account for 1st cousin prime, defined as (3, 7)
if start_num == 3 && end_num > 6 { cousinscnt += 1; if end_num < 11 { last_cousin = 7 }}
kn := krange % ks // set number of resgroups in last slice
if kn == 0 { kn = ks } // if multiple of seg size set to seg size
t2 := time.Now() // sieve execution time
fmt.Printf("\nsieve time = %v", t2.Sub(t1)) // ssoz sieve time
fmt.Printf("\ntotal time = %v", t2.Sub(ts)) // setup + sieve time
fmt.Printf("\nlast segment = %d resgroups; segment slices = %d\n", kn, (krange - 1)/ks + 1)
fmt.Printf("total cousins = %d; last cousin = %d|-4", cousinscnt, last_cousin)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment