Skip to content

Instantly share code, notes, and snippets.

@lethern
Last active November 6, 2018 04:56
Show Gist options
  • Select an option

  • Save lethern/e610e8f83f42db7bbd03383918940083 to your computer and use it in GitHub Desktop.

Select an option

Save lethern/e610e8f83f42db7bbd03383918940083 to your computer and use it in GitHub Desktop.
// might work with compiler optimilization turned on, but better when disabled
#include <thread>
#include <iostream>
#include <string>
#include <Windows.h> // for Sleep()
typedef unsigned long long uint64;
constexpr int CPU_N = 4; // the more the longer it will wait (reduce n), 99+ will break the array
constexpr int n = 100000000;
struct shared_data {
volatile bool start = false;
volatile int arr[CPU_N * 101 + 5] ={ 0 };
// arr[i*100] = inc
// arr[i*100+1] = i (different cache)
// arr[2, 3, ...] = i1, i2, i3.. (same cache)
shared_data() {
for( int i=0; i < CPU_N; ++i )
arr[i * 100] = 1;
}
};
std::ostream& operator <<( std::ostream& os, shared_data& shared ) {
for( int i=0; i < CPU_N; ++i ) {
int val = shared.arr[i * 100 + 1];
if( val ) std::cout << " i" << (i + 1) << " " << val << " (" << val / 1000000 << " M)\n";
}
for( int j=2; j < CPU_N + 1; ++j ) {
int val = shared.arr[j];
if( val ) std::cout << " j" << j << " " << val << " (" << val / 1000000 << " M)\n";
}
return os;
}
struct thread_data {
shared_data* shared;
int number;
std::thread* t;
void init( shared_data* s, int n ) {
shared= s;
number= n;
}
void init_t( std::thread* _t ) {
t= _t;
}
};
void parallelized_func_1( thread_data* s ) {
auto& shared = *s->shared;
int index = s->number * 100 + 1;
while( !shared.start );
for( int i=0; i < n; ++i ) {
shared.arr[index]++;
}
}
void parallelized_func_2( thread_data* s ) {
auto& shared = *s->shared;
int index = s->number + 1;
while( !shared.start );
for( int i=0; i < n; ++i ) {
shared.arr[index]++;
}
}
void parallelized_func_3( thread_data* s ) {
auto& shared = *s->shared;
int index = s->number * 100 + 1;
while( !shared.start );
for( int i=0; i < n; ++i ) {
shared.arr[index]+= shared.arr[0];
}
}
void parallelized_func_4( thread_data* s ) {
auto& shared = *s->shared;
int index = s->number * 100 + 1;
int inc_index = s->number * 100;
while( !shared.start );
for( int i=0; i < n; ++i ) {
shared.arr[index]+= shared.arr[inc_index];
}
}
void test( void( *func )(thread_data* s), std::string description );
void thread_test()
{
test(parallelized_func_1, "Save on diff cache line");
test(parallelized_func_2, "Save on same cache line");
test(parallelized_func_3, "Save on diff cache line, read from same cache line");
test(parallelized_func_4, "Save on diff cache line, read from diff cache line");
}
void test( void( *func )(thread_data* s), std::string description )
{
shared_data shared;
thread_data data[CPU_N];
std::thread* t[CPU_N];
for( int i=0; i < CPU_N; ++i ) {
data[i].init( &shared, i );
t[i] = new std::thread( func, &data[i] );
data[i].init_t( t[i] );
}
shared.start= true;
Sleep( 50 );
std::cout << description << '\n' << shared << std::endl;
for( int i=0; i < CPU_N; ++i ) {
t[i]->join();
delete t[i];
}
}
/*
number (number M) is sum for given thread, so additions performed in 50ms
cpu=2
Save on diff cache line
i1 19610018 (19 M)
i2 20091855 (20 M)
Save on same cache line
i1 7833202 (7 M)
j2 8351601 (8 M)
Save on diff cache line, read from same cache line
i1 12648547 (12 M)
i2 5045249 (5 M)
Save on diff cache line, read from diff cache line
i1 4268625 (4 M)
i2 16202878 (16 M)
cpu=4
Save on diff cache line
i1 8232565 (8 M)
i2 8713713 (8 M)
i3 24232903 (24 M)
i4 19517495 (19 M)
Save on same cache line
i1 6144409 (6 M)
j2 5420106 (5 M)
j3 5421032 (5 M)
j4 6614839 (6 M)
Save on diff cache line, read from same cache line
i1 2639208 (2 M)
i2 9254534 (9 M)
i3 19828949 (19 M)
i4 12866018 (12 M)
Save on diff cache line, read from diff cache line
i1 23546340 (23 M)
i2 14772822 (14 M)
i3 16452606 (16 M)
i4 24126207 (24 M)
cpu=8
Save on diff cache line
i1 10473691 (10 M)
i2 6352630 (6 M)
i3 7592871 (7 M)
i4 10344223 (10 M)
i5 18681724 (18 M)
i6 13409743 (13 M)
i7 21532665 (21 M)
i8 21089546 (21 M)
Save on same cache line
i1 3459366 (3 M)
j2 2143212 (2 M)
j3 3425390 (3 M)
j4 5591536 (5 M)
j5 6009022 (6 M)
j6 2654168 (2 M)
j7 6207111 (6 M)
j8 4458036 (4 M)
Save on diff cache line, read from same cache line
i1 4561187 (4 M)
i2 5034110 (5 M)
i3 2119836 (2 M)
i4 4495064 (4 M)
i5 14529670 (14 M)
i6 7462099 (7 M)
i7 14228359 (14 M)
i8 14074283 (14 M)
Save on diff cache line, read from diff cache line
i1 5470949 (5 M)
i2 5348128 (5 M)
i3 8305827 (8 M)
i4 6489701 (6 M)
i5 15874218 (15 M)
i6 12931176 (12 M)
i7 15900159 (15 M)
i8 16156779 (16 M)
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment