Last active
February 4, 2020 15:02
-
-
Save Pascal66/870c715529339734f5cfdd312bd82ae6 to your computer and use it in GitHub Desktop.
Fast Segmented Twin Primes SieveopenCL (Aparapi)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package fr.cridp.sieve; | |
import com.aparapi.Kernel; | |
import com.aparapi.ProfileInfo; | |
import com.aparapi.Range; | |
import com.aparapi.internal.kernel.KernelManager; | |
import java.math.BigInteger; | |
import java.util.ArrayDeque; | |
import java.util.Arrays; | |
import java.util.Comparator; | |
import java.util.LinkedList; | |
import java.util.List; | |
import java.util.Scanner; | |
import java.util.stream.IntStream; | |
import static java.math.BigInteger.ONE; | |
import static java.math.BigInteger.ZERO; | |
/** | |
* This Java source file is a multiple threaded implementation to perform an | |
* extremely fast Segmented Sieve of Zakiya (SSoZ) to find Twin Primes <= N. | |
* <p> | |
* Inputs are single values N, of 64-bits, 0 -- 2^64 - 1. | |
* Output is the number of twin primes <= N; the last | |
* twin prime value for the range; and the total time of execution. | |
* <p> | |
* Run as Java application, and enter a range (comma or space spareted) when asked in console. | |
* <p> | |
* This java project, and updates, will be available here: | |
* https://github.com/Pascal66/TwinsPrimesSieve | |
* <p> | |
* <p> | |
* Example : | |
* Please enter an range of integer (comma or space separated): | |
* 0 1e11 | |
* Max threads = 8 | |
* Using Prime Generator parameters for given Pn 13 | |
* segment size = 524288 resgroups; seg array is [1 x 8192] 64-bits | |
* twinprime candidates = 4945055940 ; resgroups = 3330004 | |
* each 1485 threads has nextp[2 x 27287] array | |
* setup time = 0.124 secs | |
* perform twinprimes ssoz sieve with s=6 | |
* sieve time = 11.17 secs | |
* last segment = 184276 resgroups; segment slices = 7 | |
* total twins = 224376048; last twin = 99999999762+/-1 | |
* total time = 11.294 secs | |
* <p> | |
* Please enter an range of integer (comma or space separated): | |
* 0 1e12 | |
* Max threads = 8 | |
* Using Prime Generator parameters for given Pn 13 | |
* segment size = 802816 resgroups; seg array is [1 x 12544] 64-bits | |
* twinprime candidates = 49450550490 ; resgroups = 33300034 | |
* each 1485 threads has nextp[2 x 78492] array | |
* setup time = 0.125 secs | |
* perform twinprimes ssoz sieve with s=6 | |
* sieve time = 154.168 secs | |
* last segment = 384578 resgroups; segment slices = 42 | |
* total twins = 1870585220; last twin = 999999999960+/-1 | |
* total time = 154.293 secs | |
* <p> | |
* Original nim source file, and updates, available here: | |
* https://gist.github.com/jzakiya/6c7e1868bd749a6b1add62e3e3b2341e | |
* Original d source file, and updates, available here: | |
* https://gist.github.com/jzakiya/ae93bfa03dbc8b25ccc7f97ff8ad0f61 | |
* Original rust source file, and updates, available here: | |
* https://gist.github.com/jzakiya/b96b0b70cf377dfd8feb3f35eb437225 | |
* <p> | |
* Mathematical and technical basis for implementation are explained here: | |
* https://www.academia.edu/37952623The_Use_of_Prime_Generators_to_Implement_Fast_Twin_Primes_Sieve_of_Zakiya_SoZ_Applications_to_Number_Theory_and_Implications_for_the_Riemann_Hypotheses | |
* https://www.academia.edu/7583194/The_Segmented_Sieve_of_Zakiya_SSoZ | |
* https://www.academia.edu/19786419/PRIMES-UTILS_HANDBOOK | |
* <p> | |
* This code is provided free and subject to copyright and terms of the | |
* GNU General Public License Version 3, GPLv3, or greater. | |
* License copy/terms are here: http://www.gnu.org/licenses/ | |
* <p> | |
* Copyright (c) 2017-20 Jabari Zakiya -- jzakiya at gmail dot com | |
* Java version 0.21.CL - Pascal Pechard -- pascal at priveyes dot net | |
* Version Date: 2020/01/20 | |
* Attention ! Range 0 1E6 crash | |
*/ | |
public class SSOZJCL2 { | |
static final BigInteger TWO = ONE.add(ONE); | |
static final BigInteger THREE = TWO.add(ONE); | |
static int segmentSize = 0; // segment size for each seg restrack | |
static BigInteger start_num; // lo number for range | |
static BigInteger end_num; // hi number for range | |
static BigInteger Kmax = ZERO; // number of resgroups to end_num | |
static BigInteger Kmin; // number of resgroups to start_num | |
static ArrayDeque<Integer> primes; // list of primes r1..sqrt(N) | |
// static long[] cnts; // hold twin primes counts for seg bytes | |
// static long[] lastwins; // holds largest twin prime <= num in each thread | |
static BigInteger PGmodulus; // PG's modulus value | |
static int firstResidue; // PG's first residue value | |
static LinkedList<Integer> restwins;// PG's list of twinpair residues | |
static int[] resinvrs; // PG's list of residues inverses | |
static int segmentSizeFactor; // segment size factor for PG and input number | |
static final int S = 5; // 0|3 for 1|8 resgroups/byte for 'small|large' ranges | |
static final int BMASK = (1 << S) - 1; | |
/** | |
* Create Prime Generator parameters for given Pn. | |
* Using BigInteger permit gcd and ModInverse | |
* For convenience, both version (Long and BigInteger are created) | |
* | |
* @param bestPrimeGenerator int | |
*/ | |
private static void genPGparameters(int bestPrimeGenerator) { | |
System.out.println("Using Prime Generator parameters for given Pn " + bestPrimeGenerator); | |
final int[] primes = {2, 3, 5, 7, 11, 13, 17, 19, 23}; | |
PGmodulus = ONE; | |
firstResidue = 0; | |
for (int prime : primes) { | |
firstResidue = prime; | |
if (prime > bestPrimeGenerator) break; | |
PGmodulus = PGmodulus.multiply(BigInteger.valueOf(firstResidue)); | |
} | |
LinkedList<Integer> restwins = new LinkedList<>(); // save upper twin pair residues here | |
int[] inverses = new int[PGmodulus.intValue() + 2]; // save PG's residues inverses here | |
BigInteger pc = THREE.add(TWO); | |
int inc = 2; | |
BigInteger res = ZERO; | |
// find a residue, then modular complement | |
while (pc.compareTo(PGmodulus.divide(TWO)) < 0) { | |
// if pc a residue | |
if (PGmodulus.gcd(pc).equals(ONE)) { | |
final BigInteger pc_mc = PGmodulus.subtract(pc); // create its modular complement | |
int inv_r = pc.modInverse(PGmodulus).intValue();// modinv(pc, modpg); // compute residues's inverse | |
inverses[pc.intValue()] = inv_r; // save its inverse | |
inverses[inv_r] = pc.intValue(); // save its inverse inverse | |
inv_r = pc_mc.modInverse(PGmodulus).intValue(); // compute residues's complement inverse | |
inverses[pc_mc.intValue()] = inv_r; // save its inverse | |
inverses[inv_r] = pc_mc.intValue(); // save its inverse inverse | |
if (res.add(TWO).equals(pc)) { | |
restwins.add(pc.intValueExact()); | |
restwins.add(pc_mc.add(TWO).intValueExact()); | |
} // save hi_tp residues | |
res = pc; | |
} | |
pc = BigInteger.valueOf(pc.longValue() + inc); | |
inc ^= 0b110; | |
} | |
restwins.sort(Comparator.naturalOrder()); | |
restwins.add(PGmodulus.add(ONE).intValueExact()); // last residue is last hi_tp | |
inverses[PGmodulus.add(ONE).intValue()] = ONE.intValue(); | |
inverses[PGmodulus.subtract(ONE).intValue()] = PGmodulus.subtract(ONE).intValue(); // last 2 residues are self inverses | |
new PGparam(PGmodulus, firstResidue, restwins, inverses); | |
} | |
static class PGparam { | |
public static int modulus; | |
public static int Lkmin; | |
public static int Lkmax; | |
public static int Lstart; | |
public static int Lend; | |
public static int primesSize; | |
public static int segByteSize = 0; | |
public static int Lrange; | |
public PGparam(BigInteger modpg, int res_0, LinkedList<Integer> restwins, int[] inverses) { | |
SSOZJCL2.PGmodulus = modpg; | |
SSOZJCL2.firstResidue = res_0; | |
SSOZJCL2.restwins = restwins; | |
SSOZJCL2.resinvrs = inverses; | |
modulus = modpg.intValueExact(); | |
Kmin = start_num.subtract(TWO).divide(modpg).add(ONE); // number of resgroups to start_num | |
Kmax = end_num.subtract(TWO).divide(modpg).add(ONE); // number of resgroups to end_num | |
Lkmin = Kmin.intValueExact(); | |
Lkmax = Kmax.intValueExact(); | |
// number of range resgroups, at least 1 | |
Lrange = Lkmax - Lkmin + 1; | |
int n = 4; //Lrange < 37_500_000_000_000L ? 4 : Lrange < 975_000_000_000_000L ? 6 : 8; | |
// set seg size to optimize for selected PG | |
int B = (segmentSizeFactor * 1024 * n); | |
// segments resgroups size | |
segmentSize = Math.min(Lrange, B); | |
segByteSize = (int) ((segmentSize - 1 >>> S) + 1); | |
System.out.println("segment size = " + segmentSize + " resgroups; seg array is [1 x " + segByteSize + "] "+(BMASK+1L)+"-bits"); | |
int maxpairs = Lrange * restwins.size(); // maximum number of twinprime pcs | |
System.out.println("twinprime candidates = " + maxpairs + " ; resgroups = " + Lrange); | |
} | |
} | |
/** | |
* Select at runtime best PG and segment size factor to use for input value. | |
* These are good estimates derived from PG data profiling. Can be improved. | |
*/ | |
static void setSieveParameters(BigInteger start_range, BigInteger end_range) { | |
final BigInteger range = end_range.subtract(start_range); | |
int bestPG = 3; | |
if (end_range.compareTo(BigInteger.valueOf(49L)) < 0) { | |
segmentSizeFactor = 1; | |
bestPG = 3; | |
} else if (range.compareTo(BigInteger.valueOf(24_000_000L)) < 0) { | |
segmentSizeFactor = 16; | |
bestPG = 5; | |
} else if (range.compareTo(BigInteger.valueOf(1_100_000_000L)) < 0) { | |
segmentSizeFactor = 32; | |
bestPG = 7; | |
} else if (range.compareTo(BigInteger.valueOf(35_500_000_000L)) < 0) { | |
segmentSizeFactor = 64; | |
bestPG = 11; | |
} else if (range.compareTo(BigInteger.valueOf(15_000_000_000_000L)) < 0) { | |
bestPG = 13; | |
if (range.compareTo(BigInteger.valueOf(7_000_000_000_000L)) > 0) { | |
segmentSizeFactor = 384; | |
} else if (range.compareTo(BigInteger.valueOf(2_500_000_000_000L)) > 0) { | |
segmentSizeFactor = 320; | |
} else if (range.compareTo(BigInteger.valueOf(250_000_000_000L)) > 0) { | |
segmentSizeFactor = 196; | |
} else { | |
segmentSizeFactor = 128; | |
} | |
} else { | |
segmentSizeFactor = 384; | |
bestPG = 17; | |
} | |
genPGparameters(bestPG); | |
} | |
/** | |
* Compute the primes r1..sqrt(input_num) and store in global 'primes' array. | |
* Any algorithm (fast|small) is usable. Here the SoZ for P5 is used. | |
*/ | |
static void sozpg(BigInteger maxRange) { | |
// P5's modulus value | |
final int P5Modulus = 30; | |
// P5's residue count | |
final int P5ResidueCounter = 8; | |
// P5's residues list | |
final int[] res = {7, 11, 13, 17, 19, 23, 29, 31}; | |
final int[] posn = {0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 4, 0, 0, 0, 5, 0, 0, 0, 0, 0, 6, 0, 7}; | |
// integer sqrt of sqrt(input value) | |
final long sqrtN = Bsqrt(maxRange).longValue(); | |
// number of resgroups to input value | |
final int kmax = maxRange.subtract(BigInteger.valueOf(7L)) | |
.divide(BigInteger.valueOf(P5Modulus)) | |
.add(ONE).intValue(); | |
short[] prms = new short[kmax]; // byte array of prime candidates | |
int modk = 0; | |
int r = -1; | |
int k = 0; // init residue parameters | |
// mark the multiples of the primes r1..sqrtN in 'prms' | |
while (true) { | |
r++; | |
if (r == P5ResidueCounter) { | |
r = 0; | |
modk += P5Modulus; | |
k++; | |
} | |
if ((prms[k] & (1 << r) & 0XFF) != 0) continue; // skip pc if not prime | |
final int prm_r = res[r]; // if prime save its residue value | |
final int prime = modk + prm_r; // numerate the prime value | |
if (prime > sqrtN) break; // we're finished when it's > sqrtN | |
for (int ri : res) { // mark prime's multiples in prms | |
final int prod = prm_r * ri - 2; // compute cross-product for prm_r|ri pair | |
final int bit_r = (1 << posn[mod(prod, P5Modulus)]) & 0XFF; // bit mask for prod's residue | |
int kpm = k * (prime + ri) + prod / P5Modulus; // 1st resgroup for prime mult | |
while (kpm < kmax) { | |
prms[kpm] |= bit_r; | |
kpm += prime; | |
} | |
} | |
} | |
// prms now contains the nonprime positions for the prime candidates r1..N | |
// extract primes into global var 'primes' | |
primes = new ArrayDeque<>(); // create empty dynamic array for primes | |
IntStream.range(0, kmax).forEach(km -> { // for each resgroup | |
int[] ri = {0}; | |
for (int res_r : res) { // extract the primes in numerical order | |
if ((prms[km] & (1 << ri[0]++) & 0XFF) == 0) { | |
primes.add(P5Modulus * km + res_r); | |
} | |
} | |
}); | |
while (primes.getFirst() < firstResidue) primes.pollFirst(); | |
while (primes.getLast() > maxRange.longValue()) primes.pollLast(); | |
} | |
static class twinSieve extends Kernel { | |
//TODO Must be outside | |
final int[] lastwins; | |
final int[] cnts; | |
@Constant final int[] twinsResidues; | |
@Constant final int[] residuesInverse; | |
@Constant final int[] primesArray; | |
int PGmodulus = 0; | |
int Lend = 0; | |
int Lstart = 0; | |
int firstResidueIndex = 0; //PGparam.Lkmin - 1; | |
int lastResidueIndex = 0; //PGparam.Lkmax; | |
int segSize = 0; | |
int segSlice = 0; | |
int flagCreateNextTwinPrime = 0; | |
@Local final int[] nextp; | |
@Local final int[] seg; | |
twinSieve(int[] twinsResidues, int[] primesArray) { | |
segSize = SSOZJCL2.segmentSize; | |
segSlice = segSize; | |
Lstart = PGparam.Lstart; | |
Lend = PGparam.Lend; | |
firstResidueIndex = PGparam.Lkmin - 1; | |
lastResidueIndex = PGparam.Lkmax; | |
PGmodulus = PGparam.modulus; | |
// Not working in constructor !? | |
this.twinsResidues = twinsResidues; //SSOZJCL2.restwins.stream().mapToLong(i->i).toArray(); | |
this.primesArray = primesArray; //SSOZJCL2.primes.stream().mapToLong(i->i).toArray(); | |
this.residuesInverse = resinvrs; | |
// This must be final global and not here ! | |
// array to hold count of tps for each thread | |
// TODO must be outside | |
cnts = new int[restwins.size()]; | |
// array to hold largest tp for each thread | |
lastwins = new int[restwins.size()]; | |
seg = new int[(int) ((segSize - 1 >>> S) + 1)]; | |
nextp = new int[primes.size()]; | |
} | |
@Override | |
public void run() { | |
int thread = getGlobalId(); | |
int upperTwinResidue = twinsResidues[thread]; | |
int lowerTwinResidue = upperTwinResidue - 2; | |
int sum = 0; | |
// ensure high and low tps in range | |
if (((lastResidueIndex - 1) * PGmodulus + upperTwinResidue) > Lend) lastResidueIndex--; | |
if ((firstResidueIndex * PGmodulus + lowerTwinResidue) < Lstart) firstResidueIndex++; | |
// TODO Doesnt work in @run | |
// @Local int[] seg = new int[(int) ((KB - 1 >>> S) + 1)]; | |
// @Local int[] nextp = new int[AprimesArray.length]; | |
int highestTwinPair = 0; // max tp|resgroup, Kmax for slice | |
flagCreateNextTwinPrime = firstResidueIndex; | |
while (firstResidueIndex < lastResidueIndex) { | |
// set last slice resgroup size | |
if ((lastResidueIndex - firstResidueIndex) < segSize) segSlice = lastResidueIndex - firstResidueIndex; | |
for (int i = 0; i < primesArray.length; i++) { | |
//NextpInitPart | |
//This is done I time for each thread, for each prime | |
//Cant avoid it, since openCl doesnt Allow to call external method | |
if (flagCreateNextTwinPrime == firstResidueIndex) { | |
// find the resgroup it's in, and its residue value, and its residue inverse | |
int residueGroupIndex = (primesArray[i] - 2) / PGmodulus; | |
int residue = floorMod((primesArray[i] - 2), PGmodulus) + 2; | |
int residueInverse = residuesInverse[residue]; | |
// compute the lowResidue for r for lo_tp | |
int lowResidue = floorMod((lowerTwinResidue * residueInverse - 2), PGmodulus) + 2; | |
int indexOfLowResidue = (residueGroupIndex * (primesArray[i] + lowResidue) + (residue * lowResidue - 2) / PGmodulus); | |
if (indexOfLowResidue < firstResidueIndex) { // if 1st mult index < start_num's | |
indexOfLowResidue = floorMod((firstResidueIndex - indexOfLowResidue), primesArray[i]); // how many indices short is it | |
if (indexOfLowResidue > 0) indexOfLowResidue = primesArray[i] - indexOfLowResidue; // adjust index value into range | |
} else indexOfLowResidue -= firstResidueIndex; // else here, adjust index if it was > | |
nextp[i << 1] = indexOfLowResidue; // lo_tp index val >= start of range | |
// compute the highResidue for r for hi_tp | |
int highResidue = floorMod((upperTwinResidue * residueInverse - 2), PGmodulus) + 2; | |
int indexOfHighResidue = residueGroupIndex * (primesArray[i] + highResidue) + (residue * highResidue - 2) / PGmodulus; | |
if (indexOfHighResidue < firstResidueIndex) { // if 1st mult index < start_num's | |
indexOfHighResidue = floorMod((firstResidueIndex - indexOfHighResidue), primesArray[i]); // how many indices short is it | |
if (indexOfHighResidue > 0) indexOfHighResidue = primesArray[i] - indexOfHighResidue; // adjust index value into range | |
} else indexOfHighResidue -= firstResidueIndex; // else here, adjust index if it was > | |
nextp[i << 1 | 1] = indexOfHighResidue; // hi_tp index val >= start of range | |
} // End NextpInit | |
// for lower twin pair residue track | |
int indexOfLowResidue = nextp[i << 1]; // starting from this resgroup in seg | |
for (; indexOfLowResidue < segSlice; indexOfLowResidue += primesArray[i]) | |
seg[(indexOfLowResidue >>> S)] |= (1 << (indexOfLowResidue & BMASK)); // mark primenth resgroup bits prime mults and set next prime multiple resgroup | |
nextp[i << 1] = (indexOfLowResidue - segSlice); // save 1st resgroup in next eligible seg | |
// for upper twin pair residue track | |
int indexOfHighResidue = nextp[i << 1 | 1]; // starting from this resgroup in seg | |
for (; indexOfHighResidue < segSlice; indexOfHighResidue += primesArray[i]) // mark primenth resgroup bits prime mults | |
seg[(indexOfHighResidue >>> S)] |= (1 << (indexOfHighResidue & BMASK)); // set next prime multiple resgroup | |
nextp[i << 1 | 1] = (indexOfHighResidue - segSlice); // save 1st resgroup in next eligible seg | |
} | |
int upperSegmentIndex = (segSlice - 1); | |
seg[upperSegmentIndex >>> S] |= ((-(2 << (upperSegmentIndex & BMASK)))); | |
// init seg twin primes count then find seg sum | |
int cnt = 0; | |
for (int m = 0; m <= (upperSegmentIndex >>> S); m++) cnt += (1 << S) - bitCount(seg[m]); | |
// if segment has twin primes, add the segment count to total count | |
if (cnt > 0) { | |
sum += cnt; | |
// from end of seg, count backwards to largest tp | |
while ((seg[(upperSegmentIndex >>> S)] & (1 << (upperSegmentIndex & BMASK))) != 0) upperSegmentIndex--; | |
// numerate its full resgroup value | |
highestTwinPair = firstResidueIndex + upperSegmentIndex; | |
} | |
// set 1st resgroup val of next seg slice | |
firstResidueIndex += segSize; | |
if (firstResidueIndex < lastResidueIndex) | |
// set seg to all primes | |
for (int b = 0; b <= upperSegmentIndex >>> S; b++) seg[b] = 0; | |
} | |
// when sieve done for full range, numerate largest twin prime in segs | |
highestTwinPair = upperTwinResidue > Lend ? 0 : highestTwinPair * PGmodulus + upperTwinResidue; | |
lastwins[thread] = (sum == 0 ? 1 : highestTwinPair);// store final seg tp value | |
cnts[thread] = sum; // sum for twin pair | |
} | |
private int floorMod(int x, int y) { | |
// return Math.floorMod(x,y); | |
return ((x >> 31) & (y - 1)) + (x % y); | |
} | |
private int bitCount(int i) { | |
//return Integer.bitCount(i); | |
i = i - ((i >>> 1) & 0x55555555); | |
i = (i & 0x33333333) + ((i >>> 2) & 0x33333333); | |
i = (i + (i >>> 4)) & 0x0f0f0f0f; | |
i = i + (i >>> 8); | |
i = i + (i >>> 16); | |
// i = i + (i >>> 32); | |
return i & 0x3f; | |
} | |
} | |
/** | |
* Main routine to setup, time, and display results for twin primes sieve. | |
*/ | |
static void twinprimes_ssoz() { | |
System.out.println(" Max threads = " + (countProcessors())); | |
long ts = epochTime(); | |
// select PrimeGenerator and seg factor Bn for input range | |
setSieveParameters(start_num, end_num); | |
final int pairscnt = restwins.size(); // number of twin pairs for selected PG | |
final int[] cnts = new int[restwins.size()]; // array to hold count of tps for each thread | |
final int[] lastwins = new int[restwins.size()]; // array to hold largest tp for each thread | |
// generate sieving primes | |
if (PGparam.Lend < 49L) primes.add((5)); | |
else sozpg(Bsqrt(end_num)); // <= sqrt(end_num) | |
PGparam.primesSize = primes.size(); | |
System.out.println("each " + pairscnt + " threads has nextp[" + 2 + " x " + PGparam.primesSize + "] array"); | |
int twinscnt = 0; // number of twin primes in range | |
final int lo_range = restwins.getFirst() - 3; // lo_range = lo_tp - 1 | |
// excluded low tp values for PGs used | |
for (int tp : new int[]{3, 5, 11, 17}) { | |
if (end_num.equals(THREE)) break; // if 3 end of range, no twin primes | |
if (tp >= PGparam.Lstart && tp <= lo_range) twinscnt++; | |
} | |
long te = epochTime() - ts; | |
System.out.println("setup time = " + te / 1e3 + " secs"); | |
System.out.println("perform twinprimes ssoz sieve"); | |
final long t1 = epochTime(); // start timing ssoz sieve execution | |
//This doesnt work in constructor !? | |
final int[] arestwins = restwins.stream().mapToInt(i -> i).toArray(); | |
final int[] aprimesArray = primes.stream().mapToInt(i -> i).toArray(); | |
twinSieve sieve = new twinSieve(arestwins, aprimesArray); | |
sieve.setExplicit(true); | |
Range twinRange = Range.create(pairscnt); | |
sieve.put(cnts).put(lastwins); | |
sieve.execute(twinRange); | |
sieve.get(cnts).get(lastwins); | |
// find largest twin prime in range | |
int last_twin = 0; | |
System.out.println("cnts Array "+Arrays.toString(cnts)); | |
System.out.println("lastwins Array "+ Arrays.toString(lastwins)); | |
twinscnt += Arrays.stream(cnts).sum(); | |
last_twin = Arrays.stream(lastwins).max().orElse(0); | |
if (PGparam.Lend == 5L && twinscnt == 1) last_twin = 5; | |
// set number of resgroups in last slice | |
int Kn = PGparam.Lrange % segmentSize; | |
// if multiple of seg size set to seg size | |
if (Kn == 0) Kn = segmentSize; | |
long t2 = epochTime() - t1; | |
System.out.println("sieve time = " + t2 / 1e3 + " secs"); | |
System.out.println("last segment = " + Kn + " resgroups; segment slices = " + ((PGparam.Lrange - 1) / segmentSize + 1)); | |
System.out.println("total twins = " + twinscnt + "; last twin = " + (last_twin - 1) + "+/-1"); | |
System.out.println("total time = " + (t2 + te) / 1e3 + " secs\n"); | |
System.out.println(KernelManager.instance().bestDevice()); | |
System.out.println("\nRange " + twinRange.toString()); | |
List<ProfileInfo> profileInfo = sieve.getProfileInfo(); | |
if (profileInfo != null && profileInfo.size() > 0) { | |
for (ProfileInfo p : profileInfo) { | |
System.out.print(p.getType() + " " + p.getLabel() | |
+ " " + p.getStart() / 1000 + " .. " + p.getEnd() / 1000 | |
+ " " + (p.getEnd() - p.getStart()) / 1000 + "us" | |
+ " " + p.getSubmit() + " " + p.getQueued() ); | |
} | |
System.out.println(); | |
} | |
sieve.dispose(); | |
} | |
public static void main(String[] args) { | |
boolean verbose = true; | |
if (verbose) { | |
// System.setProperty("com.aparapi.enableVerboseJNI", "true"); | |
// System.setProperty("com.aparapi.dumpFlags", "true"); | |
System.setProperty("com.aparapi.enableShowGeneratedOpenCL", "true"); | |
// System.setProperty("com.aparapi.enableVerboseJNIOpenCLResourceTracking", "true"); | |
System.setProperty("com.aparapi.enableExecutionModeReporting", "true"); | |
System.setProperty("com.aparapi.logLevel", "WARNING"); //{OFF|FINEST|FINER|FINE|WARNING|SEVERE|ALL} | |
System.setProperty("com.aparapi.enableProfiling", "true"); | |
System.setProperty("com.aparapi.executionMode", "GPU,CPU"); //{GPU|ACC|CPU|JTP|SEQ} | |
System.setProperty("com.aparapi.dumpProfilesOnExit", "true"); | |
} | |
boolean tweak = true; | |
if (tweak) { | |
System.setProperty("com.aparapi.enable.PUTFIELD", "true"); | |
// System.setProperty("com.aparapi.disable.ARETURN", "true"); | |
System.setProperty("com.aparapi.enable.PUTSTATIC", "true"); | |
System.setProperty("com.aparapi.enable.GETSTATIC", "true"); | |
// System.setProperty("com.aparapi.enable.INVOKEINTERFACE", "true"); | |
System.setProperty("com.aparapi.enable.MONITOR", "true"); | |
System.setProperty("com.aparapi.disable.ARRAY", "false"); | |
// System.setProperty("com.aparapi.enable.NEW", "true"); | |
// System.setProperty("com.aparapi.enable.ATHROW", "true"); | |
// System.setProperty("com.aparapi.disable.METHODARRAYPASSING", "true"); | |
System.setProperty("com.aparapi.enable.ARRAYLENGTH", "true"); | |
// System.setProperty("com.aparapi.enable.SWITCH", "true"); | |
//System.setProperty("com.aparapi.enableAtomic32", "true"); | |
//System.setProperty("com.aparapi.enableAtomic64", "true"); | |
System.setProperty("com.aparapi.enableByteWrites", "true"); | |
//System.setProperty("com.aparapi.enableDoubles", "true"); | |
} | |
//if (args==null) | |
Scanner userInput = new Scanner(System.in).useDelimiter("[,\\s+]"); | |
System.out.println("Please enter an range of integer (comma or space separated): "); | |
//Only BigDecimal understand scientific notation | |
BigInteger stop = userInput.nextBigDecimal().toBigIntegerExact(); | |
BigInteger start = userInput.hasNextLine() ? userInput.nextBigDecimal().toBigIntegerExact() : THREE; | |
userInput.close(); | |
if (stop.compareTo(start) < 0) { | |
BigInteger tmp = start; | |
start = stop; | |
stop = tmp; | |
} | |
start = start.max(THREE); | |
BigInteger end = stop.max(THREE); | |
start_num = start.or(ONE); // if start_num even add 1 | |
end_num = end.subtract(ONE).or(ONE); // if end_num even subtract 1 | |
PGparam.Lstart = start_num.intValueExact(); | |
PGparam.Lend = end_num.intValueExact(); | |
twinprimes_ssoz(); | |
} | |
private static int countProcessors() { | |
return Runtime.getRuntime().availableProcessors(); | |
} | |
/** | |
* Granularity tens of milliseconds. | |
*/ | |
private static long epochTime() { | |
return System.currentTimeMillis(); | |
} | |
public static BigInteger Bsqrt(BigInteger x) { | |
BigInteger div = BigInteger.ZERO.setBit(x.bitLength() >> 1); | |
BigInteger div2 = div; | |
// Loop until we hit the same value twice in a row, or wind up alternating. | |
for (; ; ) { | |
BigInteger y = div.add(x.divide(div)).shiftRight(1); | |
if (y.equals(div) || y.equals(div2)) return div.min(div2); | |
div2 = div; | |
div = y; | |
} | |
} | |
/** | |
* According to the Java Language Spec, Java's % operator is a remainder operator, not a modulo operator. | |
* | |
* @param x a long | |
* @param y a long | |
* @return a long modulo | |
*/ | |
private static int mod(int x, int y) { | |
// return Math.floorMod(x,y); | |
return ((x >> 31) & (y - 1)) + (x % y); | |
} | |
} |
Author
Pascal66
commented
Feb 2, 2020
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment