Last active
November 12, 2015 01:07
-
-
Save alarrosa14/29441e55c53e42b562cc to your computer and use it in GitHub Desktop.
A Kmeans implementation using posix threads. Generates a Octave script to plot the output.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <pthread.h> | |
#include <stdlib.h> | |
#include <math.h> | |
#include <limits.h> | |
#include <float.h> | |
#include <stdio.h> | |
#include <sys/types.h> | |
#define CANT_THREADS 8 | |
#define N 300000 | |
#define K 100 | |
#define TOLERANCE 0.00001 | |
#define SEED 11 | |
typedef struct { | |
double x; | |
double y; | |
} Point; | |
Point* data; | |
Point* means; | |
int* groupForPoint; | |
double error; | |
int global_p, global_k; | |
pthread_mutex_t mutex_iterators, mutex_error; | |
pthread_barrier_t barrier; | |
void generateOctaveScript(int step) { | |
int i, j, first; | |
printf("# STEP %d\n", step); | |
printf("function step%d()\n", step); | |
for (i = 0; i < K; i++) { | |
first = 0; | |
printf("means(%d, :) = [%f %f];\n", i+1, means[i].x, means[i].y); | |
for (j = 0; j < N; j++) { | |
if (groupForPoint[j] == i) { | |
if (first == 0){ | |
first++; | |
printf("data%d = [", i+1); | |
} | |
printf("[%f %f] ; ", data[j].x, data[j].y); | |
} | |
} | |
printf("];\n"); | |
} | |
printf("plot("); | |
for (i = 1; i <= K; i++) { | |
printf("means(%d, 1), means(%d, 2), '.', data%d(:,1), data%d(:,2), '+'", i, i, i, i); | |
if (i < K) | |
printf(","); | |
} | |
printf(");\n"); | |
printf("endfunction\n"); | |
} | |
double distance(Point p1, Point p2) { | |
Point diff; | |
diff.x = p1.x - p2.x; | |
diff.y = p1.y - p2.y; | |
return (double) sqrt(diff.x*diff.x + diff.y*diff.y); | |
} | |
void* threadedFunction(void* args) { | |
int k; | |
int closerGroup; | |
double dist, minDist; | |
Point pAdd; | |
int n, p; | |
pthread_mutex_lock(&mutex_iterators); | |
while (global_p < N) { | |
p = global_p; | |
global_p = global_p + 1; | |
pthread_mutex_unlock(&mutex_iterators); | |
minDist = DBL_MAX; | |
closerGroup = 0; | |
for (k = 0; k < K; k++) { | |
dist = distance(data[p], means[k]); | |
if (dist < minDist) { | |
minDist = dist; | |
closerGroup = k; | |
} | |
} | |
pthread_mutex_lock(&mutex_error); | |
error += minDist; | |
pthread_mutex_unlock(&mutex_error); | |
groupForPoint[p] = closerGroup; | |
pthread_mutex_lock(&mutex_iterators); | |
} | |
pthread_mutex_unlock(&mutex_iterators); | |
pthread_barrier_wait(&barrier); | |
pthread_mutex_lock(&mutex_iterators); | |
while (global_k < K) { | |
k = global_k++; | |
pthread_mutex_unlock(&mutex_iterators); | |
pAdd.x = 0; pAdd.y = 0; | |
n = 0; | |
for (p = 0; p < N; p++) { | |
if (groupForPoint[p] == k) { | |
pAdd.x += data[p].x; | |
pAdd.y += data[p].y; | |
n++; | |
} | |
} | |
if (n > 0) { | |
means[k].x = pAdd.x / n; | |
means[k].y = pAdd.y / n; | |
} | |
pthread_mutex_lock(&mutex_iterators); | |
} | |
pthread_mutex_unlock(&mutex_iterators); | |
pthread_barrier_wait(&barrier); | |
pthread_exit(0); | |
} | |
int kmeans() { | |
pthread_t kmeans_t[CANT_THREADS]; | |
pthread_mutex_init(&mutex_iterators, NULL); | |
pthread_mutex_init(&mutex_error, NULL); | |
pthread_barrier_init(&barrier, NULL, CANT_THREADS); | |
int i; | |
double prevError; | |
error = DBL_MAX; | |
groupForPoint = (int*) malloc(N*sizeof(int)); | |
int iter; | |
iter= 0; | |
do { | |
//printf("Step %d\n", iter); | |
prevError = error; | |
error = 0; | |
global_p = 0; | |
global_k = 0; | |
for (i=0; i<CANT_THREADS; i++) { | |
int err = pthread_create(&kmeans_t[i], NULL, threadedFunction, NULL); | |
if (err != 0) | |
printf("Error creando el thread %d\n", i); | |
} | |
pthread_join(kmeans_t[0], NULL); | |
iter++; | |
} while (fabs(error - prevError) > TOLERANCE); | |
pthread_mutex_destroy(&mutex_iterators); | |
pthread_mutex_destroy(&mutex_error); | |
pthread_barrier_destroy(&barrier); | |
return iter; | |
} | |
int main(void) { | |
int i; | |
data = (Point*)malloc(N*sizeof(Point)); | |
means = (Point*)malloc(K*sizeof(Point)); | |
srand(SEED); | |
/* Cargamos puntos */ | |
for (i = 0; i < N; i++) { | |
data[i].x = (double)rand()/RAND_MAX; | |
data[i].y = (double)rand()/RAND_MAX; | |
} | |
/* Inicializamos las medias */ | |
for (i = 0; i < K; i++) { | |
means[i].x = (double)rand()/RAND_MAX; | |
means[i].y = (double)rand()/RAND_MAX; | |
} | |
int iterations = kmeans(); | |
generateOctaveScript(iterations); | |
free(groupForPoint); | |
free(data); | |
free(means); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment