Created
December 6, 2014 03:02
-
-
Save brouhaha/62f2178d12ec04a81078 to your computer and use it in GitHub Desktop.
Test misaligned reads and writes spanning cache line boundaries
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Test misaligned reads and writes spanning cache line boundaries | |
// 2014-12-05 Eric Smith <[email protected]> | |
// This program demonstrates that on an AMD FX-8350, and presumably | |
// other x86_64 processors, misaligned 64-bit reads and/or writes | |
// which span a cache line boundary are not atomic. For a | |
// "simultaneous" write and read of a misaligned value, the read may | |
// return a value that is partially the pre-write value, and partially | |
// the written value. | |
// At least on an AMD FX-8350, it appears that misaligned reads and writes | |
// that do not span cache line boundaries are atomic. | |
#include <inttypes.h> | |
#include <pthread.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <sysexits.h> | |
#include <unistd.h> | |
#define CACHE_LINE_SIZE 64 | |
volatile bool stop; | |
volatile uint64_t *p; | |
void *producer(void *arg) | |
{ | |
int id = *((int *) arg); | |
uint64_t count = 0; | |
while (! stop) | |
{ | |
*p = (count & 0xff) * 0x0101010101010101ULL; | |
count++; | |
} | |
fprintf(stderr, "producer %d iterations: %" PRIu64 "d\n", id, count); | |
return 0; | |
} | |
void *consumer(void *arg) | |
{ | |
int id = *((int *) arg); | |
uint8_t b; | |
uint64_t v; | |
uint64_t count = 0; | |
while (! stop) | |
{ | |
v = *p; | |
b = v & 0xff; | |
if (v != (b * 0x0101010101010101ULL)) | |
{ | |
fprintf(stderr, "consumer %d iteration %" PRIu64 " read %" PRIx64 "\n", id, count, v); | |
//stop = true; | |
} | |
count++; | |
} | |
fprintf(stderr, "consumer %d iterations: %" PRIu64 "\n", id, count); | |
return 0; | |
} | |
#define MAX_THREADS 100 | |
int producer_count; | |
int consumer_count; | |
int thread_arg [MAX_THREADS]; | |
pthread_t thread [MAX_THREADS]; | |
int main(int argc, char **argv) | |
{ | |
int i; | |
int thread_num; | |
uint8_t *buf; | |
(void) argc; | |
(void) argv; | |
producer_count = 2; | |
consumer_count = 2; | |
if (posix_memalign((void **) & buf, CACHE_LINE_SIZE, 2 * CACHE_LINE_SIZE)) | |
{ | |
fprintf (stderr, "posix_memalign_failed\n"); | |
return EX_UNAVAILABLE; | |
} | |
// Construct pointer so that data value is split across cache line | |
p = (uint64_t *)(buf + CACHE_LINE_SIZE - 1); | |
// The same test can be performed with a misaligned value that does | |
// not cross a cache line by replacing the above assignment to p | |
// with this one. On an AMD FX-8350, testing that case reveals no | |
// failures. | |
// p = (uint64_t *)(buf + 1); | |
stop = false; | |
thread_num = 0; | |
for (i = 0; i < producer_count; i++) | |
{ | |
thread_arg [thread_num] = i; | |
int rc = pthread_create (& thread [thread_num], | |
NULL, | |
producer, | |
& thread_arg [thread_num]); | |
if (rc) | |
{ | |
fprintf (stderr, "producer pthread_create failed\n"); | |
return EX_UNAVAILABLE; | |
} | |
thread_num++; | |
} | |
for (i = 0; i < consumer_count; i++) | |
{ | |
thread_arg [thread_num] = i; | |
int rc = pthread_create (& thread [thread_num], | |
NULL, | |
consumer, | |
& thread_arg [thread_num]); | |
if (rc) | |
{ | |
fprintf (stderr, "consumer pthread_create failed\n"); | |
return EX_UNAVAILABLE; | |
} | |
thread_num++; | |
} | |
sleep (10); | |
stop = true; | |
sleep (1); | |
return EX_OK; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment