Last active
September 29, 2015 22:48
-
-
Save maxdeliso/1680623 to your computer and use it in GitHub Desktop.
Sieve of Atkin
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Max DeLiso <[email protected]> | |
* | |
* Purpose: compute prime numbers using a Sieve of Atkin | |
* | |
* Runtime efficiency: O(N/log log N) | |
* | |
* Memory efficiency: N/8 bytes | |
* | |
* Input: Limiting value n, [1,n-1] will be computed and outputted. | |
* n is constrained by ULLONG_MAX, which has the value | |
* 18446744073709551615, or ~ 18.4 * 10^18, or eighteen quintillion. | |
* | |
* NOTE: Currently, the program attempts to allocate n/8 bytes of RAM | |
* off the heap, and if it fails the program aborts | |
* | |
* TODO: Figure out how to make the program work for values of n which | |
* exceed available RAM. (pipelining) | |
* | |
* TODO: Add optional concurrency via pthreads | |
*/ | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdbool.h> | |
#include <stdint.h> | |
#include <limits.h> | |
#include <inttypes.h> | |
#include <assert.h> | |
#include <math.h> | |
#include <string.h> | |
#include <time.h> | |
#include <errno.h> | |
#include <pthread.h> | |
/* Functions for bit manipulation */ | |
#define BITS_PER_BYTE 8 | |
#define BYTE(i) ((i)/BITS_PER_BYTE) | |
#define BIT(i) ((i)%BITS_PER_BYTE) | |
typedef uint64_t bigUint; | |
static const unsigned char BYTE_MASK[] = { 128, 64, 32, 16, 8, 4, 2, 1 }; | |
/* Prototypes */ | |
static bool isDebug() { | |
#ifdef NDEBUG | |
return false; | |
#else | |
return true; | |
#endif | |
} | |
static bigUint processArgs(int argc, char **argv); | |
static bool getByteBit(unsigned char c, bigUint n); | |
static void flipByteBit(unsigned char *cptr, unsigned short n); | |
static void setByteBit(unsigned char *cptr, unsigned short n); | |
static void clearByteBit(unsigned char *cptr, unsigned short n); | |
struct atkinSegmentParams { | |
bigUint x; | |
bigUint y; | |
bigUint limit; | |
unsigned char * bufferPtr; | |
pthread_mutex_t * bufferMutexPtr; | |
}; | |
void *atkinSegment(void* arg); | |
static clock_t atkin(bigUint limit, bigUint bufferSize, unsigned char *buffer); | |
static void printResults(bigUint limit, bigUint bufferSize, | |
const unsigned char *buffer, double time); | |
/* Entry point */ | |
int main(int argc, char **argv) | |
{ | |
bigUint limit, bufferSize; | |
unsigned char *buffer; | |
double time; | |
limit = processArgs(argc, argv); | |
if(isDebug()) { | |
printf("info: read in limit as %" PRIu64 "\n", limit); | |
} | |
bufferSize = limit / BITS_PER_BYTE; | |
if (limit % BITS_PER_BYTE != 0) { | |
++bufferSize; | |
} | |
buffer = malloc(bufferSize); | |
if (buffer == NULL) { | |
fprintf(stderr, | |
"Failed to allocate memory for a limit of %" PRIu64 ", (%" | |
PRIu64 " bytes)\n", limit, bufferSize); | |
exit(1); | |
} | |
memset(buffer, 0, bufferSize); | |
time = atkin(limit, bufferSize, buffer); | |
printResults(limit, bufferSize, buffer, time / CLOCKS_PER_SEC); | |
if(isDebug()) { | |
printf("info: freeing heap allocated buffer @ %p\n", buffer); | |
} | |
free(buffer); | |
return 0; | |
} | |
/* Function definitions */ | |
bigUint processArgs(int argc, char **argv) | |
{ | |
bigUint limit; | |
if (argc != 2) { | |
printf("usage: %s limit\n", argv[0]); | |
exit(1); | |
} else { | |
limit = strtoull(argv[1], NULL, 0); | |
if (limit <= 5 || (limit == ULLONG_MAX && errno == ERANGE)) { | |
fprintf(stderr, "fatal: limit out of range\n"); | |
exit(1); | |
} | |
return limit; | |
} | |
} | |
inline bool getByteBit(unsigned char c, bigUint n) | |
{ | |
assert(n < 8); | |
if(isDebug()) { | |
printf("getting bit %" PRIu64 " from byte %x\n", n, c); | |
} | |
return c & BYTE_MASK[n]; | |
} | |
inline void flipByteBit(unsigned char *cptr, unsigned short n) | |
{ | |
assert(n < 8); | |
if(isDebug()) { | |
printf("flipping bit %u on block @ %p\n", n, cptr); | |
} | |
*cptr ^= BYTE_MASK[n]; | |
} | |
inline void setByteBit(unsigned char *cptr, unsigned short n) | |
{ | |
assert(n < 8); | |
if(isDebug()) { | |
printf("setting bit %u on block @ %p\n", n, cptr); | |
} | |
*cptr |= BYTE_MASK[n]; | |
} | |
inline void clearByteBit(unsigned char *cptr, unsigned short n) | |
{ | |
assert(n < 8); | |
if(isDebug()) { | |
printf("clearing bit %u on block @ %p\n", n, cptr); | |
} | |
*cptr &= ~BYTE_MASK[n]; | |
} | |
void* atkinSegment(void* arg) { | |
struct atkinSegmentParams * params = (struct atkinSegmentParams*) arg; | |
bigUint n; | |
bigUint x = params -> x; | |
bigUint y = params -> y; | |
bigUint limit = params -> limit; | |
unsigned char * buffer = params -> bufferPtr; | |
pthread_mutex_t * bufferMutexPtr = params -> bufferMutexPtr; | |
/* NOTE: this threading strategy is naive, the entire buffer is locked on each access. | |
* the contention actually makes this implementation substantially slower than an equivalent one | |
* that does not use threads */ | |
n = 4 * x * x + y * y; | |
if (n <= limit && ((n % 12 == 1) || (n % 12 == 5))) { | |
pthread_mutex_lock(bufferMutexPtr); | |
flipByteBit(buffer + BYTE(n), BIT(n)); | |
pthread_mutex_unlock(bufferMutexPtr); | |
} | |
n = 3 * x * x + y * y; | |
if (n <= limit && (n % 12 == 7)) { | |
pthread_mutex_lock(bufferMutexPtr); | |
flipByteBit(buffer + BYTE(n), BIT(n)); | |
pthread_mutex_unlock(bufferMutexPtr); | |
} | |
n = 3 * x * x - y * y; | |
if (x > y && (n <= limit) && (n % 12 == 11)) { | |
pthread_mutex_lock(bufferMutexPtr); | |
flipByteBit(buffer + BYTE(n), BIT(n)); | |
pthread_mutex_unlock(bufferMutexPtr); | |
} | |
return NULL; | |
} | |
clock_t atkin(bigUint limit, bigUint bufferSize, unsigned char *buffer) | |
{ | |
bigUint x, y, n, limitSqrt; | |
clock_t startClock, endClock; | |
double limitSqrtD; | |
pthread_mutex_t bufferMutex; | |
if(pthread_mutex_init(&bufferMutex, NULL) != 0) { | |
fprintf(stderr, "couldn't initialize a mutex. errno = %08x\n", errno); | |
exit(1); | |
} | |
(void) bufferSize; /* to quiet warnings during release build */ | |
startClock = clock(); | |
limitSqrtD = floor(sqrt(limit)); | |
assert(limitSqrtD <= ULLONG_MAX); | |
limitSqrt = (bigUint) limitSqrtD; | |
setByteBit(buffer + BYTE(2), BIT(2)); | |
setByteBit(buffer + BYTE(3), BIT(3)); | |
for (x = 1; x <= limitSqrt; ++x) { | |
/* allocate param structs on stack and parallel array of worker thread handles */ | |
struct atkinSegmentParams workerParams [limitSqrt]; | |
pthread_t workerThreads[limitSqrt]; | |
struct atkinSegmentParams thParamTemplate = { | |
.x = x, | |
.y = 0, /* set for reach y in loop below */ | |
.limit = limit, | |
.bufferPtr = buffer, | |
.bufferMutexPtr = &bufferMutex | |
}; | |
/* spawn the workers */ | |
for (y = 1; y <= limitSqrt; ++y) { | |
thParamTemplate.y = y; | |
workerParams[y-1] = thParamTemplate; | |
pthread_create(&workerThreads[y-1], NULL, atkinSegment, &workerParams[y-1]); /* TODO: ret */ | |
} | |
/* join the workers */ | |
for (y = 1; y <= limitSqrt; ++y) { | |
pthread_join(workerThreads[y-1], NULL); | |
} | |
} | |
for (n = 5; n <= limitSqrt; ++n) { | |
assert(BYTE(n) < bufferSize); | |
if (getByteBit(buffer[BYTE(n)], BIT(n))) { | |
y = 1; | |
x = n * n; | |
/* TODO: parallelize these operations by splitting the primes array | |
* into four equal parts, and then iterating down those parts with | |
* one thread each */ | |
while (y * x < limit) { | |
assert(BYTE(y * x) < bufferSize); | |
clearByteBit(buffer + BYTE(y * x), BIT(y * x)); | |
++y; | |
} | |
} | |
} | |
endClock = clock(); | |
return endClock - startClock; | |
} | |
void printResults(bigUint limit, bigUint bufferSize, | |
const unsigned char *buffer, double time) | |
{ | |
bigUint i, j, lastByte, lastBit; | |
bool more; | |
lastByte = BYTE(limit); | |
lastBit = BIT(limit); | |
fprintf(stderr, "%lf\n", time); | |
more = true; | |
for (i = 0; more && i < bufferSize; ++i) { | |
for (j = 0; more && j < BITS_PER_BYTE; ++j) { | |
if (i == lastByte && (j + 1) == lastBit) { | |
more = false; | |
} else if (getByteBit(buffer[i], j)) { | |
printf("%" PRIu64 "\n", i * BITS_PER_BYTE + j); | |
} | |
} | |
} | |
printf("\n"); | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Max DeLiso <[email protected]> | |
# TODO: add various targets for differing levels of sse/avx support | |
CC=gcc | |
STRIP=strip | |
CFLAGS=-Wall -Wextra -Werror -pedantic -std=c99 | |
OPTFLAGS=-O3 -mtune=native | |
DBGFLAGS=-O0 -g | |
LDFLAGS=-lm -lpthread | |
OUT=atkin | |
.PHONY: clean help | |
atkin-release: atkin.c | |
gcc $(CFLAGS) $(LDFLAGS) $(OPTFLAGS) -DNDEBUG -o $@ atkin.c | |
$(STRIP) $@ | |
atkin-static.o: atkin.c | |
gcc $(CFLAGS) $(OPTFLAGS) -DNDEBUG atkin.c -c -o $@ | |
atkin-static: atkin-static.o | |
gcc atkin-static.o -static -o $@ $(LDFLAGS) | |
$(STRIP) $@ | |
atkin-gen-pgo: atkin.c | |
gcc $(CFLAGS) $(LDFLAGS) $(OPTFLAGS) -DNDEBUG -fprofile-generate -o $@ atkin.c | |
atkin-use-pgo: atkin.c | |
gcc $(CFLAGS) $(LDFLAGS) $(OPTFLAGS) -DNDEBUG -fprofile-use -o $@ atkin.c | |
atkin-gen-gcov: atkin.c | |
gcc $(CFLAGS) $(LDFLAGS) -DNDEBUG -fprofile-arcs -ftest-coverage -o $@ atkin.c | |
atkin-debug: atkin.c | |
gcc $(CFLAGS) $(LDFLAGS) $(DBGFLAGS) atkin.c -o $@ | |
check: atkin-gen-gcov | |
@echo test | |
clean: | |
rm -f atkin atkin.asm atkin-* *.{gcno,gcda,gcov} *~ | |
help: | |
@echo atkin-release atkin-static atkin-gen-pgo atkin-use-pgo atkin-gen-gcov atkin-debug check |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment