Skip to content

Instantly share code, notes, and snippets.

@danielealbano
Last active February 5, 2018 00:57
Show Gist options
  • Save danielealbano/6b02e514a6f8f5f8601f99045627efa1 to your computer and use it in GitHub Desktop.
Save danielealbano/6b02e514a6f8f5f8601f99045627efa1 to your computer and use it in GitHub Desktop.
HLE Example - compile with gcc72 -Wall -pthread -O3 -mhle -g -o hle main2.c, define LOCK_MUTEX, LOCK_ATOMIC or LOCK_HLE to compare, define VERBOSE to get verbose info or CSV to get CSV output format
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <stdatomic.h>
#include <immintrin.h>
#include <unistd.h>
#include <time.h>
#define BILLION 1000000000L;
#ifndef LOCK_MUTEX
#ifndef LOCK_ATOMIC
#ifndef LOCK_HLE
#define LOCK_HLE
#endif
#endif
#endif
#ifdef LOCK_HLE
#define LOCK_TYPE "HLE"
#elif LOCK_ATOMIC
#define LOCK_TYPE "ATOMIC"
#else
#define LOCK_TYPE "MUTEX"
#endif
struct test_element {
#ifdef LOCK_MUTEX
pthread_mutex_t lock;
#else
u_int32_t lock;
#endif
int counter;
} __attribute__((aligned(64)));
struct test {
size_t size;
u_int32_t config_loops;
struct test_element* elements;
} __attribute__((aligned(64)));
struct thread_data {
u_int8_t tid;
u_int64_t aborts;
pthread_t thread;
cpu_set_t cpuset;
struct test* test;
u_int64_t duration;
};
#ifdef LOCK_MUTEX
void lock_acquire(u_int64_t *aborts, pthread_mutex_t *lock)
{
pthread_mutex_lock(lock);
}
#else
void lock_acquire(u_int64_t *aborts, u_int32_t *lock)
{
while (__atomic_exchange_n(lock, 1,
#ifdef LOCK_HLE
__ATOMIC_ACQUIRE|__ATOMIC_HLE_ACQUIRE
#elif LOCK_ATOMIC
__ATOMIC_ACQUIRE
#endif
) != 0) {
*aborts = *aborts + 1;
_mm_pause();
}
}
#endif
#ifdef LOCK_MUTEX
void lock_release(pthread_mutex_t *lock)
{
pthread_mutex_unlock(lock);
}
#else
void lock_release(u_int32_t *lock)
{
int lockval = 0;
__atomic_store(lock, &lockval,
#ifdef LOCK_HLE
__ATOMIC_RELEASE|__ATOMIC_HLE_RELEASE
#else
__ATOMIC_RELEASE
#endif
);
}
#endif
void* t_func(void* userdata)
{
struct timespec begin, end;
struct thread_data* thread_data = userdata;
struct test* test = thread_data->test;
pthread_t thread = pthread_self();
if (pthread_setaffinity_np(thread, sizeof(cpu_set_t), &thread_data->cpuset) != 0) {
printf("Failed to set cpu affinity for thread %d\n", thread_data->tid);
}
pthread_getaffinity_np(thread, sizeof(cpu_set_t), &thread_data->cpuset);
clock_gettime( CLOCK_REALTIME, &begin);
int i = 0;
for(i = 0; i < test->config_loops; i++) {
int element_index;
for(element_index = 0; element_index < test->size; element_index++) {
struct test_element* test_element = &test->elements[element_index];
{
lock_acquire(&thread_data->aborts, &test_element->lock);
test_element->counter++;
lock_release(&test_element->lock);
}
}
}
clock_gettime( CLOCK_REALTIME, &end);
thread_data->duration = (end.tv_sec - begin.tv_sec) + (end.tv_nsec - begin.tv_nsec) / BILLION;
return NULL;
}
int main(int argc, char** argv) {
int i;
u_int32_t ncpu, config_threads, config_slots, config_loops;
u_int64_t total_loops;
u_int64_t total_retries;
struct test* test;
struct thread_data* threads_data, *thread_data;
clock_t total_duration;
if (argc != 4) {
printf("Invalid arguments!\n");
printf("%s NUM_THREADS NUM_SLOTS NUM_LOOPS!\n", argv[0]);
return 1;
}
ncpu = sysconf(_SC_NPROCESSORS_ONLN);
config_threads = atoi(argv[1]);
config_slots = atoi(argv[2]);
config_loops = atoi(argv[3]);
total_loops = config_loops * config_slots * config_threads;
test = (struct test*) malloc(sizeof(struct test));
test->size = config_slots;
test->config_loops = config_loops;
test->elements = (struct test_element*) malloc(sizeof(struct test_element) * test->size);
memset(test->elements, 0, sizeof(struct test_element) * test->size);
threads_data = (struct thread_data*) malloc(sizeof(struct thread_data) * config_threads);
memset(threads_data, 0, sizeof(struct thread_data) * config_threads);
for (i = 0; i < config_threads; i++) {
thread_data = threads_data + i;
thread_data->tid = i;
thread_data->test = test;
CPU_ZERO(&thread_data->cpuset);
CPU_SET(i % ncpu, &thread_data->cpuset);
pthread_create(&thread_data->thread, NULL, t_func, (void *)thread_data);
}
for (i = 0; i < config_threads; i++) {
pthread_join(threads_data[i].thread, NULL);
}
total_retries = 0;
total_duration = 0;
for (i = 0; i < config_threads; i++) {
struct thread_data *thread_data = threads_data + i;
total_retries += thread_data->aborts;
total_duration += thread_data->duration;
}
#ifndef CSV
#ifdef VERBOSE
printf("= HARDWARE\n");
printf("\tONLINE CPUs: %d\n", ncpu);
printf("\n");
#endif
printf("= CONFIG\n");
printf("\tLOCK TYPE: " LOCK_TYPE "\n");
printf("\tNUM THREADS: %d\n", config_threads);
printf("\tNUM SLOTS: %d\n", config_slots);
printf("\tNUM LOOPS: %d\n", config_loops);
printf("\tTOTAL LOOPS: %lu\n", total_loops);
printf("\n");
printf("= THREAD RESULTS\n");
#ifdef VERBOSE
for (i = 0; i < config_threads; i++) {
struct thread_data *thread_data = threads_data + i;
printf("\tTHREAD %d", thread_data->tid);
printf("\tCPU:");
int cpuidx;
for (cpuidx = 0; cpuidx < ncpu; cpuidx++) {
if (CPU_ISSET(cpuidx, &thread_data->cpuset)) {
printf(" %d", cpuidx);
}
}
printf("\tRETRIES: %lu", thread_data->aborts);
printf("\tDURATION: %ld", thread_data->duration);
printf("\n");
}
printf("\n");
#endif
printf("\TOTAL RETRIES: %lu\n", total_retries);
printf("\tAVG RETRIES: %Lf\n", (long double)total_retries / (long double)config_threads);
printf("\tTOTAL DURATION (CLKS): %lu\n", total_duration);
printf("\tAVG DURATION (CLKS): %Lf\n", (long double)total_duration / (long double)config_threads);
printf("\n");
#ifdef VERBOSE
printf("= MISMATCHES (only if non atomic, ie. HLE not supported)\n");
for(i = 0; i < test->size; i++) {
if (test->elements[i].counter != config_threads * config_loops) {
printf("ELEMENT[%d]: %d\n", i, test->elements[i].counter);
}
}
#endif
#else
printf("%s,", LOCK_TYPE);
printf("%d,", config_threads);
printf("%d,", config_slots);
printf("%d,", config_loops);
printf("%lu,", total_loops);
printf("%lu,", total_retries);
printf("%Lf,", (long double)total_retries / (long double)config_threads);
printf("%lu,", total_duration);
printf("%Lf", (long double)total_duration / (long double)config_threads);
printf("\n");
#endif
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment