Last active
August 29, 2015 14:17
-
-
Save janisz/41ffe95c2f8c899ce48b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <cstddef> | |
#include <cuda_runtime_api.h> | |
#include "benchmark/benchmark.h" | |
#define BASIC_BENCHMARK_TEST(x) \ | |
BENCHMARK(x)->Range(8, 8<<20) | |
void BM_malloc(benchmark::State& state) { | |
void * ptr = 0; | |
while (state.KeepRunning()) { | |
ptr = malloc( state.range_x() ); | |
free(ptr); | |
} | |
} | |
BASIC_BENCHMARK_TEST(BM_malloc); | |
BASIC_BENCHMARK_TEST(BM_malloc)->ThreadPerCpu(); | |
void BM_cuda_malloc(benchmark::State& state) { | |
void * ptr = 0; | |
while (state.KeepRunning()) { | |
cudaMalloc( &ptr, state.range_x() ); | |
cudaFree(ptr); | |
} | |
} | |
BASIC_BENCHMARK_TEST(BM_cuda_malloc); | |
BASIC_BENCHMARK_TEST(BM_cuda_malloc)->ThreadPerCpu(); | |
BENCHMARK_MAIN() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ nvidia-smi | |
Fri Mar 13 17:04:07 2015 | |
+------------------------------------------------------+ | |
| NVIDIA-SMI 346.35 Driver Version: 346.35 | | |
|-------------------------------+----------------------+----------------------+ | |
| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | | |
| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | | |
|===============================+======================+======================| | |
| 0 GeForce GTX 580 Off | 0000:03:00.0 N/A | N/A | | |
| 40% 38C P12 N/A / N/A | 68MiB / 1535MiB | N/A Default | | |
+-------------------------------+----------------------+----------------------+ | |
+-----------------------------------------------------------------------------+ | |
| Processes: GPU Memory | | |
| GPU PID Type Process name Usage | | |
|=============================================================================| | |
| 0 C Not Supported | | |
+-----------------------------------------------------------------------------+ | |
$ lscpu | |
Architecture: x86_64 | |
CPU op-mode(s): 32-bit, 64-bit | |
Byte Order: Little Endian | |
CPU(s): 8 | |
On-line CPU(s) list: 0-7 | |
Thread(s) per core: 2 | |
Core(s) per socket: 4 | |
Socket(s): 1 | |
NUMA node(s): 1 | |
Vendor ID: GenuineIntel | |
CPU family: 6 | |
Model: 42 | |
Model name: Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz | |
Stepping: 7 | |
CPU MHz: 3437.984 | |
CPU max MHz: 3800.0000 | |
CPU min MHz: 1600.0000 | |
BogoMIPS: 6824.24 | |
Virtualization: VT-x | |
L1d cache: 32K | |
L1i cache: 32K | |
L2 cache: 256K | |
L3 cache: 8192K | |
NUMA node0 CPU(s): 0-7 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ make && ./basic_test | |
[ 75%] Built target benchmark | |
Scanning dependencies of target basic_test | |
[ 83%] Building CXX object test/CMakeFiles/basic_test.dir/basic_test.cc.o | |
Linking CXX executable basic_test | |
[ 83%] Built target basic_test | |
[ 91%] Built target benchmark_test | |
[100%] Built target filter_test | |
Run on (8 X 3800 MHz CPUs) | |
2015/03/13-16:48:29 | |
***WARNING*** CPU scaling is enabled, the benchmark timings may be noisy | |
Build Type: DEBUG | |
Benchmark Time(ns) CPU(ns) Iterations | |
------------------------------------------------------------------------ | |
BM_malloc/8 26 25 26923077 | |
BM_malloc/64 24 27 25000000 | |
BM_malloc/512 42 47 14583333 | |
BM_malloc/4k 34 38 20192347 | |
BM_malloc/32k 35 39 19626205 | |
BM_malloc/256k 46 40 19444444 | |
BM_malloc/2M 41 39 19444444 | |
BM_malloc/8M 38 36 19444444 | |
BM_malloc/8/threads:8 25 157 3294120 | |
BM_malloc/64/threads:8 23 191 3906960 | |
BM_malloc/512/threads:8 11 84 10500064 | |
BM_malloc/4k/threads:8 11 86 8399960 | |
BM_malloc/32k/threads:8 11 89 9882472 | |
BM_malloc/256k/threads:8 5804 27106 31112 | |
BM_malloc/2M/threads:8 780 6250 80000 | |
BM_malloc/8M/threads:8 818 7115 120864 | |
BM_cuda_malloc/8 106344 118292 5833 | |
BM_cuda_malloc/64 106442 118292 5833 | |
BM_cuda_malloc/512 132760 118292 5833 | |
BM_cuda_malloc/4k 106179 118333 6000 | |
BM_cuda_malloc/32k 135404 118292 5833 | |
BM_cuda_malloc/256k 120524 118333 6000 | |
BM_cuda_malloc/2M 106714 118864 5833 | |
BM_cuda_malloc/8M 140644 123326 5676 | |
BM_cuda_malloc/8/threads:8 18159 20256 48216 | |
BM_cuda_malloc/64/threads:8 7926 12114 43752 | |
BM_cuda_malloc/512/threads:8 16220 18333 56000 | |
BM_cuda_malloc/4k/threads:8 18136 23111 30000 | |
BM_cuda_malloc/32k/threads:8 21948 23430 52496 | |
BM_cuda_malloc/256k/threads:8 10718 13875 62224 | |
BM_cuda_malloc/2M/threads:8 172934 219458 3296 | |
BM_cuda_malloc/8M/threads:8 171326 217059 3056 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment