Last active
November 6, 2018 04:56
-
-
Save lethern/e610e8f83f42db7bbd03383918940083 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // might work with compiler optimilization turned on, but better when disabled | |
| #include <thread> | |
| #include <iostream> | |
| #include <string> | |
| #include <Windows.h> // for Sleep() | |
| typedef unsigned long long uint64; | |
| constexpr int CPU_N = 4; // the more the longer it will wait (reduce n), 99+ will break the array | |
| constexpr int n = 100000000; | |
| struct shared_data { | |
| volatile bool start = false; | |
| volatile int arr[CPU_N * 101 + 5] ={ 0 }; | |
| // arr[i*100] = inc | |
| // arr[i*100+1] = i (different cache) | |
| // arr[2, 3, ...] = i1, i2, i3.. (same cache) | |
| shared_data() { | |
| for( int i=0; i < CPU_N; ++i ) | |
| arr[i * 100] = 1; | |
| } | |
| }; | |
| std::ostream& operator <<( std::ostream& os, shared_data& shared ) { | |
| for( int i=0; i < CPU_N; ++i ) { | |
| int val = shared.arr[i * 100 + 1]; | |
| if( val ) std::cout << " i" << (i + 1) << " " << val << " (" << val / 1000000 << " M)\n"; | |
| } | |
| for( int j=2; j < CPU_N + 1; ++j ) { | |
| int val = shared.arr[j]; | |
| if( val ) std::cout << " j" << j << " " << val << " (" << val / 1000000 << " M)\n"; | |
| } | |
| return os; | |
| } | |
| struct thread_data { | |
| shared_data* shared; | |
| int number; | |
| std::thread* t; | |
| void init( shared_data* s, int n ) { | |
| shared= s; | |
| number= n; | |
| } | |
| void init_t( std::thread* _t ) { | |
| t= _t; | |
| } | |
| }; | |
| void parallelized_func_1( thread_data* s ) { | |
| auto& shared = *s->shared; | |
| int index = s->number * 100 + 1; | |
| while( !shared.start ); | |
| for( int i=0; i < n; ++i ) { | |
| shared.arr[index]++; | |
| } | |
| } | |
| void parallelized_func_2( thread_data* s ) { | |
| auto& shared = *s->shared; | |
| int index = s->number + 1; | |
| while( !shared.start ); | |
| for( int i=0; i < n; ++i ) { | |
| shared.arr[index]++; | |
| } | |
| } | |
| void parallelized_func_3( thread_data* s ) { | |
| auto& shared = *s->shared; | |
| int index = s->number * 100 + 1; | |
| while( !shared.start ); | |
| for( int i=0; i < n; ++i ) { | |
| shared.arr[index]+= shared.arr[0]; | |
| } | |
| } | |
| void parallelized_func_4( thread_data* s ) { | |
| auto& shared = *s->shared; | |
| int index = s->number * 100 + 1; | |
| int inc_index = s->number * 100; | |
| while( !shared.start ); | |
| for( int i=0; i < n; ++i ) { | |
| shared.arr[index]+= shared.arr[inc_index]; | |
| } | |
| } | |
| void test( void( *func )(thread_data* s), std::string description ); | |
| void thread_test() | |
| { | |
| test(parallelized_func_1, "Save on diff cache line"); | |
| test(parallelized_func_2, "Save on same cache line"); | |
| test(parallelized_func_3, "Save on diff cache line, read from same cache line"); | |
| test(parallelized_func_4, "Save on diff cache line, read from diff cache line"); | |
| } | |
| void test( void( *func )(thread_data* s), std::string description ) | |
| { | |
| shared_data shared; | |
| thread_data data[CPU_N]; | |
| std::thread* t[CPU_N]; | |
| for( int i=0; i < CPU_N; ++i ) { | |
| data[i].init( &shared, i ); | |
| t[i] = new std::thread( func, &data[i] ); | |
| data[i].init_t( t[i] ); | |
| } | |
| shared.start= true; | |
| Sleep( 50 ); | |
| std::cout << description << '\n' << shared << std::endl; | |
| for( int i=0; i < CPU_N; ++i ) { | |
| t[i]->join(); | |
| delete t[i]; | |
| } | |
| } | |
| /* | |
| number (number M) is sum for given thread, so additions performed in 50ms | |
| cpu=2 | |
| Save on diff cache line | |
| i1 19610018 (19 M) | |
| i2 20091855 (20 M) | |
| Save on same cache line | |
| i1 7833202 (7 M) | |
| j2 8351601 (8 M) | |
| Save on diff cache line, read from same cache line | |
| i1 12648547 (12 M) | |
| i2 5045249 (5 M) | |
| Save on diff cache line, read from diff cache line | |
| i1 4268625 (4 M) | |
| i2 16202878 (16 M) | |
| cpu=4 | |
| Save on diff cache line | |
| i1 8232565 (8 M) | |
| i2 8713713 (8 M) | |
| i3 24232903 (24 M) | |
| i4 19517495 (19 M) | |
| Save on same cache line | |
| i1 6144409 (6 M) | |
| j2 5420106 (5 M) | |
| j3 5421032 (5 M) | |
| j4 6614839 (6 M) | |
| Save on diff cache line, read from same cache line | |
| i1 2639208 (2 M) | |
| i2 9254534 (9 M) | |
| i3 19828949 (19 M) | |
| i4 12866018 (12 M) | |
| Save on diff cache line, read from diff cache line | |
| i1 23546340 (23 M) | |
| i2 14772822 (14 M) | |
| i3 16452606 (16 M) | |
| i4 24126207 (24 M) | |
| cpu=8 | |
| Save on diff cache line | |
| i1 10473691 (10 M) | |
| i2 6352630 (6 M) | |
| i3 7592871 (7 M) | |
| i4 10344223 (10 M) | |
| i5 18681724 (18 M) | |
| i6 13409743 (13 M) | |
| i7 21532665 (21 M) | |
| i8 21089546 (21 M) | |
| Save on same cache line | |
| i1 3459366 (3 M) | |
| j2 2143212 (2 M) | |
| j3 3425390 (3 M) | |
| j4 5591536 (5 M) | |
| j5 6009022 (6 M) | |
| j6 2654168 (2 M) | |
| j7 6207111 (6 M) | |
| j8 4458036 (4 M) | |
| Save on diff cache line, read from same cache line | |
| i1 4561187 (4 M) | |
| i2 5034110 (5 M) | |
| i3 2119836 (2 M) | |
| i4 4495064 (4 M) | |
| i5 14529670 (14 M) | |
| i6 7462099 (7 M) | |
| i7 14228359 (14 M) | |
| i8 14074283 (14 M) | |
| Save on diff cache line, read from diff cache line | |
| i1 5470949 (5 M) | |
| i2 5348128 (5 M) | |
| i3 8305827 (8 M) | |
| i4 6489701 (6 M) | |
| i5 15874218 (15 M) | |
| i6 12931176 (12 M) | |
| i7 15900159 (15 M) | |
| i8 16156779 (16 M) | |
| */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment