Last active
February 5, 2018 00:57
-
-
Save danielealbano/6b02e514a6f8f5f8601f99045627efa1 to your computer and use it in GitHub Desktop.
HLE Example - compile with gcc72 -Wall -pthread -O3 -mhle -g -o hle main2.c, define LOCK_MUTEX, LOCK_ATOMIC or LOCK_HLE to compare, define VERBOSE to get verbose info or CSV to get CSV output format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _GNU_SOURCE | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <pthread.h> | |
#include <stdatomic.h> | |
#include <immintrin.h> | |
#include <unistd.h> | |
#include <time.h> | |
#define BILLION 1000000000L; | |
#ifndef LOCK_MUTEX | |
#ifndef LOCK_ATOMIC | |
#ifndef LOCK_HLE | |
#define LOCK_HLE | |
#endif | |
#endif | |
#endif | |
#ifdef LOCK_HLE | |
#define LOCK_TYPE "HLE" | |
#elif LOCK_ATOMIC | |
#define LOCK_TYPE "ATOMIC" | |
#else | |
#define LOCK_TYPE "MUTEX" | |
#endif | |
struct test_element { | |
#ifdef LOCK_MUTEX | |
pthread_mutex_t lock; | |
#else | |
u_int32_t lock; | |
#endif | |
int counter; | |
} __attribute__((aligned(64))); | |
struct test { | |
size_t size; | |
u_int32_t config_loops; | |
struct test_element* elements; | |
} __attribute__((aligned(64))); | |
struct thread_data { | |
u_int8_t tid; | |
u_int64_t aborts; | |
pthread_t thread; | |
cpu_set_t cpuset; | |
struct test* test; | |
u_int64_t duration; | |
}; | |
#ifdef LOCK_MUTEX | |
void lock_acquire(u_int64_t *aborts, pthread_mutex_t *lock) | |
{ | |
pthread_mutex_lock(lock); | |
} | |
#else | |
void lock_acquire(u_int64_t *aborts, u_int32_t *lock) | |
{ | |
while (__atomic_exchange_n(lock, 1, | |
#ifdef LOCK_HLE | |
__ATOMIC_ACQUIRE|__ATOMIC_HLE_ACQUIRE | |
#elif LOCK_ATOMIC | |
__ATOMIC_ACQUIRE | |
#endif | |
) != 0) { | |
*aborts = *aborts + 1; | |
_mm_pause(); | |
} | |
} | |
#endif | |
#ifdef LOCK_MUTEX | |
void lock_release(pthread_mutex_t *lock) | |
{ | |
pthread_mutex_unlock(lock); | |
} | |
#else | |
void lock_release(u_int32_t *lock) | |
{ | |
int lockval = 0; | |
__atomic_store(lock, &lockval, | |
#ifdef LOCK_HLE | |
__ATOMIC_RELEASE|__ATOMIC_HLE_RELEASE | |
#else | |
__ATOMIC_RELEASE | |
#endif | |
); | |
} | |
#endif | |
void* t_func(void* userdata) | |
{ | |
struct timespec begin, end; | |
struct thread_data* thread_data = userdata; | |
struct test* test = thread_data->test; | |
pthread_t thread = pthread_self(); | |
if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &thread_data->cpuset) != 0) { | |
printf("Failed to set cpu affinity for thread %d\n", thread_data->tid); | |
} | |
pthread_getaffinity_np(thread, sizeof(cpu_set_t), &thread_data->cpuset); | |
clock_gettime( CLOCK_REALTIME, &begin); | |
int i = 0; | |
for(i = 0; i < test->config_loops; i++) { | |
int element_index; | |
for(element_index = 0; element_index < test->size; element_index++) { | |
struct test_element* test_element = &test->elements[element_index]; | |
{ | |
lock_acquire(&thread_data->aborts, &test_element->lock); | |
test_element->counter++; | |
lock_release(&test_element->lock); | |
} | |
} | |
} | |
clock_gettime( CLOCK_REALTIME, &end); | |
thread_data->duration = (end.tv_sec - begin.tv_sec) + (end.tv_nsec - begin.tv_nsec) / BILLION; | |
return NULL; | |
} | |
int main(int argc, char** argv) { | |
int i; | |
u_int32_t ncpu, config_threads, config_slots, config_loops; | |
u_int64_t total_loops; | |
u_int64_t total_retries; | |
struct test* test; | |
struct thread_data* threads_data, *thread_data; | |
clock_t total_duration; | |
if (argc != 4) { | |
printf("Invalid arguments!\n"); | |
printf("%s NUM_THREADS NUM_SLOTS NUM_LOOPS!\n", argv[0]); | |
return 1; | |
} | |
ncpu = sysconf(_SC_NPROCESSORS_ONLN); | |
config_threads = atoi(argv[1]); | |
config_slots = atoi(argv[2]); | |
config_loops = atoi(argv[3]); | |
total_loops = config_loops * config_slots * config_threads; | |
test = (struct test*) malloc(sizeof(struct test)); | |
test->size = config_slots; | |
test->config_loops = config_loops; | |
test->elements = (struct test_element*) malloc(sizeof(struct test_element) * test->size); | |
memset(test->elements, 0, sizeof(struct test_element) * test->size); | |
threads_data = (struct thread_data*) malloc(sizeof(struct thread_data) * config_threads); | |
memset(threads_data, 0, sizeof(struct thread_data) * config_threads); | |
for (i = 0; i < config_threads; i++) { | |
thread_data = threads_data + i; | |
thread_data->tid = i; | |
thread_data->test = test; | |
CPU_ZERO(&thread_data->cpuset); | |
CPU_SET(i % ncpu, &thread_data->cpuset); | |
pthread_create(&thread_data->thread, NULL, t_func, (void *)thread_data); | |
} | |
for (i = 0; i < config_threads; i++) { | |
pthread_join(threads_data[i].thread, NULL); | |
} | |
total_retries = 0; | |
total_duration = 0; | |
for (i = 0; i < config_threads; i++) { | |
struct thread_data *thread_data = threads_data + i; | |
total_retries += thread_data->aborts; | |
total_duration += thread_data->duration; | |
} | |
#ifndef CSV | |
#ifdef VERBOSE | |
printf("= HARDWARE\n"); | |
printf("\tONLINE CPUs: %d\n", ncpu); | |
printf("\n"); | |
#endif | |
printf("= CONFIG\n"); | |
printf("\tLOCK TYPE: " LOCK_TYPE "\n"); | |
printf("\tNUM THREADS: %d\n", config_threads); | |
printf("\tNUM SLOTS: %d\n", config_slots); | |
printf("\tNUM LOOPS: %d\n", config_loops); | |
printf("\tTOTAL LOOPS: %lu\n", total_loops); | |
printf("\n"); | |
printf("= THREAD RESULTS\n"); | |
#ifdef VERBOSE | |
for (i = 0; i < config_threads; i++) { | |
struct thread_data *thread_data = threads_data + i; | |
printf("\tTHREAD %d", thread_data->tid); | |
printf("\tCPU:"); | |
int cpuidx; | |
for (cpuidx = 0; cpuidx < ncpu; cpuidx++) { | |
if (CPU_ISSET(cpuidx, &thread_data->cpuset)) { | |
printf(" %d", cpuidx); | |
} | |
} | |
printf("\tRETRIES: %lu", thread_data->aborts); | |
printf("\tDURATION: %ld", thread_data->duration); | |
printf("\n"); | |
} | |
printf("\n"); | |
#endif | |
printf("\TOTAL RETRIES: %lu\n", total_retries); | |
printf("\tAVG RETRIES: %Lf\n", (long double)total_retries / (long double)config_threads); | |
printf("\tTOTAL DURATION (CLKS): %lu\n", total_duration); | |
printf("\tAVG DURATION (CLKS): %Lf\n", (long double)total_duration / (long double)config_threads); | |
printf("\n"); | |
#ifdef VERBOSE | |
printf("= MISMATCHES (only if non atomic, ie. HLE not supported)\n"); | |
for(i = 0; i < test->size; i++) { | |
if (test->elements[i].counter != config_threads * config_loops) { | |
printf("ELEMENT[%d]: %d\n", i, test->elements[i].counter); | |
} | |
} | |
#endif | |
#else | |
printf("%s,", LOCK_TYPE); | |
printf("%d,", config_threads); | |
printf("%d,", config_slots); | |
printf("%d,", config_loops); | |
printf("%lu,", total_loops); | |
printf("%lu,", total_retries); | |
printf("%Lf,", (long double)total_retries / (long double)config_threads); | |
printf("%lu,", total_duration); | |
printf("%Lf", (long double)total_duration / (long double)config_threads); | |
printf("\n"); | |
#endif | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment