Last active
August 12, 2024 16:53
-
-
Save jzakiya/0ea756a8f6fd09f56cd9374d0dcf4197 to your computer and use it in GitHub Desktop.
Cousin Primes generator, multi-threaded, using SSoZ (Segmented Sieve of Zakiya), written in Go
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This Go source file is a multiple threaded implementation to perform an | |
// extremely fast Segmented Sieve of Zakiya (SSoZ) to find Cousin Primes <= N. | |
// Inputs are single values N, or ranges N1 and N2, of 64-bits, 0 -- 2^64 - 1. | |
// Output is the number of cousiin primes <= N, or in range N1 to N2; the last | |
// cousin prime value for the range; and the total time of execution. | |
// Code originally developed on a System76 laptop with an Intel I7 6700HQ cpu, | |
// 2.6-3.5 GHz clock, with 8 threads, and 16GB of memory. Parameter tuning | |
// probably needed to optimize for other hardware systems (ARM, PowerPC, etc). | |
// Compile as: $ go build cousinprimes_ssoz.go | |
// To reduce binary size do: $ strip cousinprimes_ssoz | |
// Single val: $ echo val | ./cousinprimes_ssoz | |
// Range vals: $ echo val1 val2 | ./cousinprimes_ssoz | |
// Mathematical and technical basis for implementation are explained here: | |
// https://www.academia.edu/37952623/The_Use_of_Prime_Generators_to_Implement_Fast_ | |
// Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_ | |
// for_the_Riemann_Hypotheses | |
// https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ_ | |
// https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK | |
// https://www.academia.edu/81206391/Twin_Primes_Segmented_Sieve_of_Zakiya_SSoZ_Explained | |
// This source code, and its updates, can be found here: | |
// https://gist.github.com/jzakiya/0ea756a8f6fd09f56cd9374d0dcf4197 | |
// This code is provided free and subject to copyright and terms of the | |
// GNU General Public License Version 3, GPLv3, or greater. | |
// License copy/terms are here: http://www.gnu.org/licenses/ | |
// Copyright (c) 2017-2024; Jabari Zakiya -- jzakiya at gmail dot com | |
// Last update: 2024/08/12 | |
package main | |
import ( | |
"fmt" | |
"time" | |
"sort" | |
"sync" | |
"math" | |
"math/bits" | |
"runtime" | |
"sync/atomic" | |
) | |
// Customized gcd to determine coprimality; n > m; m odd | |
func coprime(m, n int) bool { | |
for m | 1 != 1 { m, n = n % m, m } | |
return (m > 0) | |
} | |
// Compute modular inverse a^-1 to base m, e.g. a*(a^-1) mod m = 1 | |
func modinv(a0, m0 int) int { | |
if m0 == 1 { return 1 } | |
a, m := a0, m0 | |
x0, inv := 0, 1 | |
for a > 1 { | |
inv -= (a / m) * x0 | |
a, m = m, a % m | |
x0, inv = inv, x0 | |
} | |
if inv < 0 { inv += m0 } | |
return inv | |
} | |
func gen_pg_parameters(prime int) (uint, uint, int, []int, []int) { | |
// Create prime generator parameters for given Pn | |
fmt.Printf("using Prime Generator parameters for P%d\n", prime) | |
primes := []int{2, 3, 5, 7, 11, 13, 17, 19, 23} | |
modpg, res_0 := 1, 0 // compute Pn's modulus and res_0 value | |
for _, prm := range primes { res_0 = prm; if prm > prime { break }; modpg *= prm } | |
rescousins := []int{} // save upper cousinpair residues here | |
inverses := make([]int, modpg + 2) // save Pn's residues inverses here | |
rc, inc, res := 5, 2, 0 // use P3's PGS to generate residue candidates | |
midmodpg := modpg >> 1 // mid value of modpg | |
for rc < midmodpg { // find PG's 1st half residues | |
if coprime(rc, modpg) { // if rc is a residue | |
var mc = modpg - rc // create its modular complement | |
inverses[rc] = modinv(rc, modpg) // save rc and mc inverses | |
inverses[mc] = modinv(mc, modpg) // if in cousinspair save both hi residues | |
if res + 4 == rc { rescousins = append(rescousins, rc, mc + 4) } | |
res = rc // save current found residue | |
} | |
rc += inc; inc ^= 0b110 // create next P3 sequence rc: 5 7 11 13 17 19 ... | |
} | |
rescousins = append(rescousins, midmodpg + 2); sort.Ints(rescousins) | |
inverses[modpg + 1] = 1; inverses[modpg - 1] = modpg - 1 // last 2 residues are self inverses | |
return uint(modpg), uint(res_0), len(rescousins) , rescousins, inverses | |
} | |
func set_sieve_parameters(start_num, end_num uint) (uint, uint, uint, uint, uint, uint, int, []int, []int) { | |
// Select at runtime best PG and segment size parameters for input values. | |
// These are good estimates derived from PG data profiling. Can be improved. | |
trange := end_num - start_num | |
var bn, pg = 0, 3 | |
if end_num < 49 { | |
bn, pg = 1, 3 | |
} else if trange < 77_000_000 { | |
bn, pg = 16, 5 | |
} else if trange < 1_100_000_000 { | |
bn, pg = 32, 7 | |
} else if trange < 35_500_000_000 { | |
bn, pg = 64, 11 | |
} else if trange < 14_000_000_000_000 { | |
pg = 13 | |
if trange > 7_000_000_000_000 { bn = 384 | |
} else if trange > 2_500_000_000_000 { bn = 320 | |
} else if trange > 250_000_000_000 { bn = 196 | |
} else { bn = 128 } | |
} else { | |
bn, pg = 384, 17 | |
} | |
modpg, res_0, pairscnt, rescousins, resinvrs := gen_pg_parameters(pg) | |
kmin := (start_num-2) / modpg + 1 // number of resgroups to start_num | |
kmax := (end_num - 2) / modpg + 1 // number of resgroups to end_num | |
krange := kmax - kmin + 1 // number of resgroups in range, at least 1 | |
var ks, n = uint(0), 0 | |
if krange < 37_500_000_000_000 { n = 10 } else if krange < 975_000_000_000_000 { n = 16 } else { n = 20} | |
b := uint(bn * 1024 * n) // set seg size to optimize for selected PG | |
if krange < b { ks = krange } else { ks = b } // segments resgroups size | |
fmt.Printf("segment size = %d resgroups; seg array is [1 x %d] 64-bits\n", ks, (((ks-1) >> 6) + 1)) | |
maxpairs := krange * uint(pairscnt) // maximum number of cousinprime pcs | |
fmt.Printf("cousinprime candidates = %d; resgroups = %d\n", maxpairs, krange) | |
return modpg, res_0, ks, kmin, kmax, krange, pairscnt, rescousins, resinvrs | |
} | |
func sozp5(val, res_0, start_num, end_num uint) []uint { | |
// Return the primes r0..sqrt(end_num) within range (start_num...end_num) | |
md, rescnt := 30, 8 // P5's modulus and residues count | |
res := []int{7,11,13,17,19,23,29,31} // P5's residues | |
bitn := []int{0,0,0,0,0,1,0,0,0,2,0,4,0,0,0,8,0,16,0,0,0,32,0,0,0,0,0,64,0,128} | |
range_size := end_num - start_num // integers size of inputs range | |
kmax := (val - 2) / uint(md) + 1 // number of resgroups upto input value | |
prms := make([]uint8, kmax) // byte array of prime candidates, init '0' | |
sqrtn := int(math.Sqrt(float64(val))) // compute integer sqrt of val | |
k := sqrtn/rescnt; resk := sqrtn-md*k; var r = 0 // sqrtn resgroup|residue values, 1st res posn | |
for resk >= res[r] { r += 1 } // find largest residue <= sqrtn posn in its resgroup | |
pcs_to_sqrtn := k*rescnt + r // number of pcs <= sqrtn | |
for i := 0; i < pcs_to_sqrtn; i++ { // for r0..sqrtn primes mark their multiples | |
k := i/rescnt; r := i%rescnt // update resgroup parameters | |
if (prms[k] & (1 << r)) != 0 { continue } // skip pc if not prime | |
prm_r := res[r] // if prime save its residue value | |
prime := md*k + prm_r // numerate its value | |
rem := start_num % uint(prime) // prime's modular distance to start_num | |
if !(uint(prime) - rem <= range_size || rem == 0) { continue } // skip prime if no multiple in range | |
for _, ri := range res { // mark prime's multiples in prms | |
prod := prm_r * ri - 2 // compute cross-product for prm_r|ri pair | |
bit_r := uint8(bitn[prod % md]) // bit mask value for prod's residue | |
var kpm = k * (prime + ri) + prod / md // resgroup for prime's 1st multiple | |
for uint(kpm) < kmax { prms[kpm] |= bit_r; kpm += prime } // mark primes's multiples | |
} } | |
// prms now contains the prime multiples positions marked for the pcs r0..N | |
// now along each restrack, identify|numerate the primes in each resgroup | |
// return only the primes with a multiple within range (start_num...end_num) | |
primes := []uint{} // create empty dynamic array for primes | |
for i, r_i := range res { // along each restrack|row til end | |
for k, resgroup := range prms { // for each resgroup along restrack | |
if resgroup & (1 << i) == 0 { // if bit location a prime | |
prime := uint(md * k + r_i) // numerate its value, store if in range | |
// check if prime has multiple in range, if so keep it, if not don't | |
rem := start_num % prime // if rem is 0 then start_num multiple of prime | |
if (res_0 <= prime && prime <= val) && (prime - rem <= range_size || rem == 0) { primes = append(primes, prime) } | |
} } } | |
return primes // primes stored in restrack|row sequence order | |
} | |
func nextp_init(rhi, kmin, modpg uint, primes []uint, resinvrs []int) []uint { | |
// Initialize 'nextp' array for cousinpair upper residue rhi in 'rescousins'. | |
// Compute 1st prime multiple resgroups for each prime r0..sqrt(N) and | |
// store consecutively as lo_cp|hi_cp pairs for their restracks. | |
nextp := make([]uint,len(primes)*2) // 1st mults array for cousinpair | |
r_hi, r_lo := rhi, rhi - 4 // upper|lower cousinpair residue values | |
for j, prime := range primes { // for each prime r0..sqrt(N) | |
k := (prime - 2) / modpg // find the resgroup it's in | |
r := (prime - 2) % modpg + 2 // and its residue value | |
r_inv := uint(resinvrs[r]) // and residue inverse | |
rl := (r_lo * r_inv - 2) % modpg + 2 // compute r's ri for r_lo | |
rh := (r_hi * r_inv - 2) % modpg + 2 // compute r's ri for r_hi | |
kl := k * (prime + rl) + (r * rl - 2) / modpg // kl 1st mult resgroup | |
kh := k * (prime + rh) + (r * rh - 2) / modpg // kh 1st mult resgroup | |
if kl < kmin { kl = (kmin - kl) % prime; if kl > 0 { kl = prime - kl } } else { kl -= kmin } | |
if kh < kmin { kh = (kmin - kh) % prime; if kh > 0 { kh = prime - kh } } else { kh -= kmin } | |
nextp[j * 2] = uint(kl) // prime's 1st mult lo_cp resgroup val in range | |
nextp[j * 2 | 1] = uint(kh) // prime's 1st mult hi_cp resgroup val in range | |
} | |
return nextp | |
} | |
func cousins_sieve(rhi int, kmin, kmax, ks, start_num, end_num, modpg uint, primes []uint, resinvrs []int) (uint, uint) { | |
// Perform in thread the ssoz for given cousinpair residues for kmax resgroups. | |
// First create|init 'nextp' array of 1st prime mults for given cousinpair, | |
// stored consequtively in 'nextp', and init seg array for ks resgroups. | |
// For sieve, mark resgroup bits to '1' if either cousinpair restrack is nonprime | |
// for primes mults resgroups, and update 'nextp' restrack slices acccordingly. | |
// Return the last cousinprime|sum for the range for this cousinpair residues. | |
s := 6 // shift value for 64 bits | |
bmask := uint((1 << s) - 1) // bitmask val for 64 bits | |
sum, ki, kn, r_hi := 0, kmin-1, ks, uint(rhi) // init these parameters | |
hi_cp, k_max := uint(0), kmax // max cousinprime|resgroup | |
seg := make([]uint64, ((ks - 1) >> s) + 1) // seg array of ks resgroups | |
if r_hi - 4 < (start_num - 2) % modpg + 2 {ki += 1} // ensure lo cp in range | |
if r_hi > (end_num - 2) % modpg + 2 {k_max -= 1} // ensure hi cp in range | |
nextp := nextp_init(r_hi, ki, modpg, primes, resinvrs) // init nextp array | |
for ki < k_max { // sieve ks size slices upto kmax | |
if ks > k_max - ki {kn = k_max - ki} // adjust kn size for last seg | |
for j, prime := range primes { // for each prime r0..sqrt(N) | |
// for lower residue track | |
var k1 uint = nextp[j * 2] // starting from this resgroup in seg | |
for k1 < kn { // mark primenth resgroup bits prime mults | |
seg[k1 >> s] |= uint64(1) << (k1 & bmask) | |
k1 += prime } // set resgroup for prime's next multiple | |
nextp[j * 2] = k1 - kn // save 1st resgroup in next eligible seg | |
// for upper residue track | |
var k2 uint = nextp[j * 2 | 1] // starting from this resgroup in seg | |
for k2 < kn { // mark primenth resgroup bits prime mults | |
seg[k2 >> s] |= uint64(1) << (k2 & bmask) | |
k2 += prime } // set resgroup for prime's next multiple | |
nextp[j * 2 | 1] = k2 - kn // save 1st resgroup in next eligible seg | |
} // set as nonprime unused bits in last seg[n] | |
// so fast, do for every seg[i] | |
seg[(kn - 1) >> s] |= ^uint64(1) << ((kn - 1) & bmask) | |
cnt := 0 // count the cousinprimes in the segment | |
for i := 0; i <= int(kn - 1) >> s; i++ { cnt += bits.OnesCount64(^seg[i]) } | |
if cnt > 0 { // if segment has cousinprimes | |
sum += cnt // add the segment count to total count | |
var upk = kn - 1 // from end of seg count back to largest tp | |
for seg[upk >> s] & (uint64(1) << (upk & bmask)) != 0 { upk -= 1 } | |
hi_cp = upk + ki // set its full range resgroup value | |
} | |
ki += ks // set 1st resgroup val of next seg slice | |
if ki < k_max { for i := range seg { seg[i] = 0 } } // set seg bits to all primes | |
} // when sieve done, numerate largest cousin in range | |
// for small ranges w/o cousins set largest to 0 | |
if r_hi > end_num || sum == 0 { hi_cp = 0 } else { hi_cp = hi_cp * modpg + r_hi } | |
return hi_cp, uint(sum) // return largest cousinprime|cousins count in range | |
} | |
func main() { | |
var start_num, end_num = uint(3), uint(3) | |
_, err := fmt.Scan(&end_num, &start_num) | |
if end_num < 3 { end_num = 3 } | |
if err != nil { start_num = 3 } else { if start_num < 3 { start_num = 3 } } | |
if start_num > end_num { start_num, end_num = end_num, start_num } | |
start_num |= 1 // if start_num even increase by 1 | |
end_num = (end_num - 1) | 1 // if end_num even decrease by 1 | |
if end_num - start_num < 4 { start_num, end_num = 7, 7 } | |
fmt.Printf("threads = %d\n", runtime.NumCPU()) | |
ts := time.Now() // start timing sieve setup execution | |
// select Pn, set sieving params for inputs | |
modpg, res_0, ks, kmin, kmax, krange, pairscnt, rescousins, resinvrs := set_sieve_parameters(start_num, end_num) | |
// create sieve primes <= sqrt(end_num), only use those whose multiples within inputs range | |
var primes = []uint{} | |
if end_num < 49 { primes = []uint{5} | |
} else { primes = sozp5(uint(math.Sqrt(float64(end_num))), res_0, start_num, end_num) } | |
fmt.Printf("each of %d threads has nextp[2 x %d] array\n", pairscnt, len(primes)) | |
te := time.Now() | |
fmt.Printf("setup time = %v \n", te.Sub(ts)) // display sieve setup time | |
fmt.Println("perform cousinprimes ssoz sieve") | |
t1 := time.Now() // start timing ssoz sieve execution | |
cnts := make([]uint, pairscnt) // cousins count for each cousinpair | |
lastcousins := make([]uint, pairscnt) // last|largest cousin per cousinpair | |
var threadscnt uint64; // count of finished threads | |
var wg sync.WaitGroup | |
for i, r_hi := range rescousins { // sieve cousinpair restracks | |
wg.Add(1) | |
go func(i, r_hi int) { | |
defer wg.Done() | |
l, c := cousins_sieve(r_hi, kmin, kmax, ks, start_num, end_num, modpg, primes, resinvrs) | |
lastcousins[i] = l; cnts[i] = c | |
fmt.Printf("\r%d of %d cousinpairs done", atomic.AddUint64(&threadscnt, 1), pairscnt) | |
}(i, r_hi) | |
} | |
wg.Wait() | |
fmt.Printf("\r%d of %d cousinpairs done", pairscnt, pairscnt) | |
cousinscnt := uint(0) // number of cousin primes in range | |
last_cousin := uint(0) // largest hi_cp cousin in range | |
if end_num < 49 { // check for cousins in low ranges | |
for _, c_hi := range []uint{11, 17, 23, 41, 47} { | |
if start_num <= c_hi - uint(4) && c_hi <= end_num { last_cousin = c_hi; cousinscnt += 1 } | |
} | |
} else { // check for cousins from sieve | |
for i := 0; i < pairscnt; i++ { // find largest cousinprime|count in range | |
cousinscnt += cnts[i] | |
if last_cousin < lastcousins[i] { last_cousin = lastcousins[i] } | |
} } | |
// account for 1st cousin prime, defined as (3, 7) | |
if start_num == 3 && end_num > 6 { cousinscnt += 1; if end_num < 11 { last_cousin = 7 }} | |
kn := krange % ks // set number of resgroups in last slice | |
if kn == 0 { kn = ks } // if multiple of seg size set to seg size | |
t2 := time.Now() // sieve execution time | |
fmt.Printf("\nsieve time = %v", t2.Sub(t1)) // ssoz sieve time | |
fmt.Printf("\ntotal time = %v", t2.Sub(ts)) // setup + sieve time | |
fmt.Printf("\nlast segment = %d resgroups; segment slices = %d\n", kn, (krange - 1)/ks + 1) | |
fmt.Printf("total cousins = %d; last cousin = %d|-4", cousinscnt, last_cousin) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment