Last active
March 12, 2020 11:26
-
-
Save TotallyNotChase/52ad6c54ee7122fbd3f245135edadff6 to your computer and use it in GitHub Desktop.
Benchmark to sieve + string checks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include<stdio.h> | |
#include<stdlib.h> | |
#include<string.h> | |
#include<stdbool.h> | |
#include<stdint.h> | |
#include<inttypes.h> | |
#include<math.h> | |
#include<time.h> | |
typedef struct uint64_vector | |
{ | |
uint64_t* data; | |
uint64_t capacity, size; | |
} uint64vec_t; | |
typedef struct char_vector | |
{ | |
bool* data; | |
uint64_t size, count; | |
} boolvec_t; | |
uint64_t L1D_CACHE; | |
uint64vec_t create_uint64_vector(uint64_t size) | |
{ | |
uint64vec_t vector; | |
vector.data = malloc(size * sizeof(uint64_t)); | |
if (vector.data == NULL) | |
{ | |
printf("\nAn error occured while allocating memory for uint64_vector\n"); | |
exit(1); | |
} | |
vector.capacity = size; | |
vector.size = 0; | |
return vector; | |
} | |
boolvec_t create_bool_vector(uint64_t size) | |
{ | |
boolvec_t vector; | |
vector.data = malloc(size * sizeof(bool)); | |
if (vector.data == NULL) | |
{ | |
printf("\nAn error occured while allocating memory for bool_vector\n"); | |
exit(1); | |
} | |
memset(vector.data, true, sizeof(bool) * size); | |
vector.size = size; | |
vector.count = 0; | |
return vector; | |
} | |
void uint64_vector_append(uint64vec_t* vector, uint64_t data) | |
{ | |
if ((vector->size + 1) < vector->capacity) | |
{ | |
vector->data[vector->size++] = data; | |
return; | |
} | |
vector->data = realloc(vector->data, (vector->capacity *= 2) * sizeof(uint64_t)); | |
if (vector->data == NULL) | |
{ | |
printf("\nAn error occured while re-allocating memory for uint64_vector\n"); | |
exit(1); | |
} | |
} | |
int countDigits(uint64_t num) | |
{ | |
return snprintf(NULL, 0, "%" SCNu64, num) - (num < 0); | |
} | |
bool isNearRep(uint64_t num) | |
{ | |
int i, unqcharcount = 0, digitcount = countDigits(num); | |
char* str, commonchar; | |
str = malloc((digitcount + 1) * sizeof(char)); | |
snprintf(str, digitcount + 1, "%" SCNu64, num); // Converting the integer number to a string | |
if (digitcount < 3) | |
{ | |
// Near Rep numbers should be at least 3 digits | |
return false; | |
} | |
/** | |
* Find the common character in the first few digits | |
* i.e if we feed in 9999899, the common char is 9 | |
*/ | |
if (str[0] == str[1] || str[0] == str[2]) | |
{ | |
commonchar = str[0]; | |
} | |
else if (str[1] == str[2]) | |
{ | |
commonchar = str[1]; | |
} | |
else | |
{ | |
// if no common character is found i.e 968999 or 988999 | |
return false; | |
} | |
/** | |
* The above conditional statements will still find a common char | |
* even if the number is 99986574, this is why we loop through for | |
* additional checks | |
*/ | |
for (i = 0; i < strlen(str); i += 1) | |
{ | |
// Checks for multiple unique digits i.e digits that are not common digits | |
if (str[i] != commonchar) | |
{ | |
unqcharcount++; | |
} | |
if (unqcharcount > 1) | |
{ | |
break; | |
} | |
} | |
if (unqcharcount == 1) | |
{ | |
// Must have exactly one unique digit | |
return true; | |
} | |
else | |
{ | |
return false; | |
} | |
} | |
size_t approximate_size(uint64_t limit) | |
{ | |
int i; | |
float x = 1; | |
for (i = log(limit); i > 0; i--) | |
{ | |
x *= 2.5; | |
} | |
return (size_t) x; | |
} | |
void segmented_sieve(uint64_t limit) | |
{ | |
int64_t low, high, i = 3, j, k, n = 3, s = 3; | |
size_t i_size, approx_arr_size = approximate_size(limit); | |
uint64_t sqrtval = (uint64_t)sqrt(limit); | |
uint64_t segment_size = sqrtval < L1D_CACHE ? L1D_CACHE : sqrtval; | |
uint64vec_t prime_arr = create_uint64_vector(approx_arr_size); | |
uint64vec_t multiples = create_uint64_vector(approx_arr_size); | |
boolvec_t sieve = create_bool_vector(segment_size); | |
boolvec_t is_prime = create_bool_vector(sqrtval + 1); | |
for (low = 0; low <= limit; low += segment_size) | |
{ | |
memset(sieve.data, true, sizeof(bool) * sieve.size); | |
high = low + segment_size - 1; | |
high = high < limit ? high : limit; | |
for (; i * i <= high; i += 2) | |
{ | |
if (is_prime.data[i]) | |
{ | |
for (j = i * i; j <= sqrtval; j += i) | |
{ | |
is_prime.data[j] = false; | |
} | |
} | |
} | |
for (; s * s <= high; s += 2) | |
{ | |
if (is_prime.data[s]) | |
{ | |
uint64_vector_append(&prime_arr, s); | |
uint64_vector_append(&multiples, s * s - low); | |
} | |
} | |
for (i_size = 0; i_size < prime_arr.size; i_size++) | |
{ | |
j = multiples.data[i_size]; | |
for (k = prime_arr.data[i_size] * 2; j < segment_size; j += k) | |
{ | |
sieve.data[j] = false; | |
} | |
multiples.data[i_size] = j - segment_size; | |
} | |
for (; n <= high; n += 2) | |
{ | |
if (sieve.data[n - low] && isNearRep(n)) | |
{ | |
printf("%" SCNu64 ", ", n); | |
} | |
} | |
} | |
} | |
int main() | |
{ | |
uint64_t N; | |
double time_taken; | |
printf("Enter your CPUs L1D Cache per core (in bytes): "); | |
scanf_s("%" SCNu64, &L1D_CACHE); | |
printf("Enter upper limit: "); | |
scanf_s("%" SCNu64, &N); | |
clock_t begin = clock(); | |
printf("["); | |
segmented_sieve(N); | |
printf("\b\b]"); | |
clock_t end = clock(); | |
time_taken = (double)(end - begin) / CLOCKS_PER_SEC; | |
printf("\nDone! Execution time: %f\n", time_taken); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time, math | |
def is_near_rep(num): | |
# Check if the number is a near rep digit number | |
numstr = str(num) | |
if len(numstr) < 3: | |
# Near rep digit numbers must be at least 3 digits | |
return False | |
""" | |
Find the common character in the first few digits | |
i.e if we feed in 9999899, the common char is 9 | |
""" | |
if numstr[0] == numstr[1] or numstr[0] == numstr[2]: | |
commonchar = numstr[0] | |
elif numstr[1] == numstr[2]: | |
commonchar = numstr[1]; | |
else: | |
# If no common character is found | |
# i.e 968999 or 988999 | |
return False | |
""" | |
The above conditional statements will still find a common char | |
even if the number is 99986574, this is why we loop through for | |
additional checks | |
""" | |
unqcharcount = 0 | |
for i in numstr: | |
# Checks for multiple unique digits | |
# i.e digits that are not common digits | |
if i != commonchar: | |
unqcharcount += 1 | |
if unqcharcount > 1: | |
break | |
if unqcharcount == 1: | |
# Must have exactly one unique digit | |
return True | |
else: | |
return False | |
def segmented_sieve(limit): | |
i, n, s = 3, 3, 3 | |
sqrtval = int(math.sqrt(limit)) | |
segment_size = sqrtval if sqrtval > L1D_CACHE else L1D_CACHE | |
is_prime = [True for x in range(0, sqrtval + 1)] | |
prime_arr = [] | |
multiples = [] | |
found_primes = [] | |
for low in range(0, limit + 1, segment_size): | |
sieve = [True for x in range(0, segment_size)] | |
high = low + segment_size - 1 | |
high = high if high < limit else limit | |
while i * i <= high: | |
if is_prime[i]: | |
for j in range(i * i, sqrtval + 1, i): | |
is_prime[j] = False | |
i += 2 | |
while s * s <= high: | |
if is_prime[s]: | |
prime_arr.append(s) | |
multiples.append(s * s - low) | |
s += 2 | |
for i_size in range(0, len(prime_arr)): | |
k = prime_arr[i_size] * 2 | |
j = multiples[i_size] | |
while j < segment_size: | |
sieve[j] = False | |
j += k | |
multiples[i_size] = j - segment_size | |
while n <= high: | |
if sieve[n - low] and is_near_rep(n): | |
found_primes.append(n) | |
n += 2 | |
print(found_primes) | |
L1D_CACHE = int(input('Enter your CPU\'s L1D cache per core (in bytes): ')) | |
N = input('Enter an upper limit: ') | |
t0 = time.time() | |
segmented_sieve(int(N)) | |
t1 = time.time() | |
print('\nTime required:', t1 - t0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment