Created
July 22, 2014 12:55
-
-
Save kumagi/2e5f7318b262d1f85ead to your computer and use it in GitHub Desktop.
byte_lockが速いとかいうので試してみた。やっつけ実装なのに60倍速い。
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <pthread.h> // pthread_create/join | |
#include <sys/time.h> // gettimeofday | |
#include <stdio.h> // printf, perror | |
#include <stdlib.h> // exit | |
#include <stdint.h> // exit | |
#define _GNU_SOURCE 1 | |
#include <sched.h> // sched_setaffinity | |
struct byte_lock { | |
byte_lock() | |
: owner_tid(0) { | |
for (int i = 0; i < sizeof(slot); ++i) { | |
slot[i] = 0; | |
} | |
} | |
void read_lock(int tid) { | |
slot[tid] = 1; | |
__asm__ volatile ("" ::: "memory"); | |
while (owner_tid) { | |
slot[tid] = 0; | |
pthread_yield(); | |
slot[tid] = 1; | |
} | |
} | |
void read_unlock(int tid) { | |
slot[tid] = 0; | |
__asm__ volatile ("" ::: "memory"); | |
} | |
void write_lock(int tid) { | |
for (;;) { | |
if (owner_tid) { | |
pthread_yield(); | |
continue; | |
} | |
volatile int local = tid; | |
if (__sync_bool_compare_and_swap(&owner_tid, 0, tid)) { | |
break; | |
} | |
(local); | |
} | |
for (;;) { | |
bool readers_eliminated = true; | |
const int long_stride = sizeof(slot) >> 3; | |
uint64_t* long_slot = (uint64_t*)&slot; | |
for (int i = 0; i < long_stride; ++i) { | |
if (long_slot[i] != 0) { | |
pthread_yield(); | |
readers_eliminated = false; | |
break; | |
} | |
} | |
if (!readers_eliminated && sizeof(slot) & 7) { | |
int offset = (sizeof(slot) >> 3) << 3; | |
for (int i = offset; i < offset + (sizeof(slot) & 7); ++i) { | |
if (slot[i] != 0) { | |
pthread_yield(); | |
readers_eliminated = false; | |
break; | |
} | |
} | |
} | |
if (readers_eliminated) { | |
break; | |
} | |
pthread_yield(); | |
} | |
} | |
void write_unlock() { | |
owner_tid = 0; | |
} | |
int owner_tid __attribute__((aligned(64))); | |
char slot[64 - sizeof(int)]; | |
}; | |
/// ------- benchmark --------- | |
struct working_set { | |
int tid; | |
pthread_barrier_t* bar; | |
byte_lock* bl; | |
pthread_rwlock_t* lk; | |
}; | |
void* work(void* w) { | |
working_set* ws = (working_set*)w; | |
const int tid = ws->tid; | |
// スレッドアフィニティの設定。tid番目のコアにこのスレッドを貼り付ける | |
cpu_set_t mask; | |
CPU_ZERO(&mask); | |
CPU_SET(ws->tid, &mask); | |
if (sched_setaffinity(0, sizeof(mask), &mask) == -1) { | |
perror("setaffinity:"); | |
exit(1); | |
} | |
// 測定開始待ち | |
pthread_barrier_wait(ws->bar); | |
printf("thread[%d] started\n", tid); | |
byte_lock* bl = ws->bl; | |
pthread_rwlock_t* lk = ws->lk; | |
if (tid == 0) { | |
for (int i = 0; i < 10000000; ++i) { | |
/* // byte lock | |
bl->write_lock(tid); | |
bl->write_unlock(); | |
//*/* // pthread_rwlock | |
pthread_rwlock_wrlock(lk); | |
pthread_rwlock_unlock(lk); | |
//*/ | |
} | |
} else { | |
for (int i = 0; i < 1000000000; ++i) { | |
/* // byte lock | |
bl->read_lock(tid); | |
bl->read_unlock(tid); | |
//*/* // pthread_rwlock | |
pthread_rwlock_rdlock(lk); | |
pthread_rwlock_unlock(lk); | |
//*/ | |
} | |
} | |
printf("thread[%d] finished\n", tid); | |
pthread_barrier_wait(ws->bar); | |
} | |
// 現在時刻をそれなりの精度で求める関数 | |
double now(){ | |
struct timeval t; | |
gettimeofday(&t, NULL); | |
return (double)t.tv_sec + (double)t.tv_usec * 1e-6; | |
} | |
int main(int argc, char** argv) { | |
if (argc < 2) { // ./a.out 4 って書くと4スレッドになる | |
printf("set thread num\n"); | |
return 1; // 指定漏れがあった場合は自殺 | |
} | |
const int threads = atoi(argv[1]); | |
// 各スレッドやデータ構造の初期化 | |
pthread_t th[threads]; | |
pthread_barrier_t barrier; | |
working_set ws[threads]; | |
byte_lock bl; | |
pthread_rwlock_t lk; | |
pthread_rwlock_init(&lk, NULL); | |
pthread_barrier_init( &barrier, 0, threads + 1); | |
for (int i = 0; i < threads; ++i) { | |
ws[i].tid = i; | |
ws[i].bar = &barrier; | |
ws[i].bl = &bl; | |
ws[i].lk = &lk; | |
pthread_create(&th[i], NULL, work, &ws[i]); | |
} | |
// 測定開始 | |
const double start = now(); | |
pthread_barrier_wait(&barrier); | |
// 測定終了 | |
pthread_barrier_wait(&barrier); | |
const double finish = now(); | |
for (int i = 0; i < threads; ++i) { | |
pthread_join(th[i], NULL); | |
} | |
// 経過時間を表示 | |
printf("thread[%d] time: %lf sec\n", threads, finish - start); | |
return 0; | |
} |
pthread_rwlock
使った方
$ ./a.out 4
thread[2] started
thread[1] started
thread[3] started
thread[0] started
thread[0] finished
thread[1] finished
thread[2] finished
thread[3] finished
thread[4] time: 494.172233 sec
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
byte_lock
使った方