Skip to content

Instantly share code, notes, and snippets.

@allanmac
Last active June 17, 2022 17:29
Show Gist options
  • Save allanmac/93868ee6be78f9d1e9f5704e6e62db43 to your computer and use it in GitHub Desktop.
Save allanmac/93868ee6be78f9d1e9f5704e6e62db43 to your computer and use it in GitHub Desktop.
Benchmark CUB Radix Sort with uniformly random data
//
// Build:
//
// nvcc -lcurand --generate-code arch=compute_50,code=compute_50 --generate-code arch=compute_75,code=compute_75 -D CUB_SORT_TYPE=uint32_t -o sort_cub_32 cub_sort.cu
// nvcc -lcurand --generate-code arch=compute_50,code=compute_50 --generate-code arch=compute_75,code=compute_75 -D CUB_SORT_TYPE=uint64_t -o sort_cub_64 cub_sort.cu
//
#define THRUST_IGNORE_CUB_VERSION_CHECK
#include <curand.h>
#include <cub/cub.cuh>
//
//
//
#include <stdbool.h>
static
void
cuda_assert(const cudaError_t code, const char* const file, const int line, const bool abort)
{
if (code != cudaSuccess)
{
fprintf(stderr,"cuda_assert: %s %s %d\n",cudaGetErrorString(code),file,line);
if (abort)
{
cudaDeviceReset();
exit(code);
}
}
}
#define cuda(...) { cuda_assert((cuda##__VA_ARGS__), __FILE__, __LINE__, true); }
//
//
//
#ifndef CUB_SORT_TYPE
#define CUB_SORT_TYPE uint64_t
#endif
#define CUB_SORT_WARMUP 100
#define CUB_SORT_TRIALS 1000
//
//
//
static
void
sort(uint32_t count,
CUB_SORT_TYPE * vin_d,
CUB_SORT_TYPE * vout_d,
void * tmp,
size_t & tmp_size,
cudaEvent_t start,
cudaEvent_t end,
float * min_ms,
float * max_ms,
float * elapsed_ms)
{
cuda(EventRecord(start,0));
cub::DeviceRadixSort::SortKeys(tmp,tmp_size,vin_d,vout_d,count);
cuda(EventRecord(end,0));
cuda(EventSynchronize(end));
float t_ms;
cuda(EventElapsedTime(&t_ms,start,end));
*min_ms = min(*min_ms,t_ms);
*max_ms = max(*max_ms,t_ms);
*elapsed_ms += t_ms;
}
//
//
//
static
void
bench(const struct cudaDeviceProp* const props,
const uint32_t count,
const uint32_t warmup,
const uint32_t trials)
{
//
// allocate
//
size_t const vin_size = sizeof(CUB_SORT_TYPE) * count;
CUB_SORT_TYPE * vin_d;
CUB_SORT_TYPE * vout_d;
cuda(Malloc(&vin_d, vin_size));
cuda(Malloc(&vout_d,vin_size));
//
// fill with random values
//
curandGenerator_t prng;
curandCreateGenerator(&prng,CURAND_RNG_PSEUDO_XORWOW);
curandSetPseudoRandomGeneratorSeed(prng,0xCAFEBABE);
if (sizeof(CUB_SORT_TYPE) == sizeof(unsigned int)) {
curandGenerate(prng,(unsigned int*)vin_d,count);
} else if (sizeof(CUB_SORT_TYPE) == sizeof(unsigned long long)) {
curandGenerateLongLong(prng,(unsigned long long*)vin_d,count);
} else {
exit(EXIT_FAILURE);
}
//
// size and allocate the temp array
//
void * tmp;
size_t tmp_size = 0;
cub::DeviceRadixSort::SortKeys(NULL,tmp_size,vin_d,vout_d,count);
cuda(Malloc(&tmp,tmp_size));
//
// benchmark
//
cudaEvent_t start, end;
cuda(EventCreate(&start));
cuda(EventCreate(&end));
float min_ms = FLT_MAX;
float max_ms = 0.0f;
float elapsed_ms = 0.0f;
for (int ii=0; ii<warmup; ii++)
{
sort(count,vin_d,vout_d,tmp,tmp_size,start,end,
&min_ms,
&max_ms,
&elapsed_ms);
}
min_ms = FLT_MAX;
max_ms = 0.0f;
elapsed_ms = 0.0f;
for (int ii=0; ii<trials; ii++)
{
sort(count,vin_d,vout_d,tmp,tmp_size,start,end,
&min_ms,
&max_ms,
&elapsed_ms);
}
cuda(EventDestroy(start));
cuda(EventDestroy(end));
//
//
//
cuda(Free(tmp));
cuda(Free(vout_d));
cuda(Free(vin_d));
//
//
//
#define STRINGIFY2(s) #s
#define STRINGIFY(s) STRINGIFY2(s)
fprintf(stdout,"%s, %u, %u.%u.%u.%u, %s, %u, %u, %u, %.3f, %.3f, %.3f, %.3f, %.3f, %.3f\n",
props->name,
props->multiProcessorCount,
CUB_MAJOR_VERSION,
CUB_MINOR_VERSION,
CUB_SUBMINOR_VERSION,
CUB_PATCH_NUMBER,
STRINGIFY(CUB_SORT_TYPE),
count,
warmup,
trials,
elapsed_ms,
(double)elapsed_ms / trials,
(double)min_ms,
(double)max_ms,
(double)count * trials / (1000.0 * elapsed_ms),
(double)count / (1000.0 * min_ms));
}
//
//
//
int
main(int argc, char** argv)
{
const int32_t device = (argc == 1) ? 0 : atoi(argv[1]);
struct cudaDeviceProp props;
cuda(GetDeviceProperties(&props,device));
printf("%s (%2d)\n",props.name,props.multiProcessorCount);
cuda(SetDevice(device));
//
// Usage:
//
// $ cub_sort_xx [ count_lo [ count_hi [ count_step [ trials [ warmup ] ] ] ] ]
//
const uint32_t count_lo = argc <= 2 ? 131072 : strtoul(argv[2],NULL,0);
const uint32_t count_hi = argc <= 3 ? 8388608 : strtoul(argv[3],NULL,0);
const uint32_t count_step = argc <= 4 ? 131072 : strtoul(argv[4],NULL,0);
const uint32_t trials = argc <= 5 ? CUB_SORT_TRIALS : strtoul(argv[5],NULL,0);
const uint32_t warmup = argc <= 6 ? CUB_SORT_WARMUP : strtoul(argv[6],NULL,0);
//
// LABELS
//
fprintf(stdout,
"Device, "
"Multiprocessors, "
"CUB, "
"Type, "
"Keys, "
"Warmup, "
"Trials, "
"Total Msecs, "
"Avg. Msecs, "
"Min Msecs, "
"Max Msecs, "
"Avg. Mkeys/s, "
"Max. Mkeys/s\n");
//
// SORT
//
for (uint32_t count=count_lo; count<=count_hi; count+=count_step)
{
bench(&props,count,warmup,trials);
}
//
// RESET
//
cuda(DeviceReset());
return 0;
}
@allanmac
Copy link
Author

$ ./sort_cub_64
NVIDIA GeForce RTX 2060 (30)
Device, Multiprocessors, CUB, Type, Keys, Warmup, Trials, Total Msecs, Avg. Msecs, Min Msecs, Max Msecs, Avg. Mkeys/s, Max. Mkeys/s
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 131072, 100, 1000, 115.693, 0.116, 0.113, 0.128, 1132.928, 1163.306
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 262144, 100, 1000, 182.972, 0.183, 0.179, 0.193, 1432.697, 1468.364
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 393216, 100, 1000, 276.936, 0.277, 0.272, 0.281, 1419.879, 1443.100
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 524288, 100, 1000, 347.213, 0.347, 0.342, 0.380, 1509.990, 1534.945
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 655360, 100, 1000, 421.408, 0.421, 0.415, 0.439, 1555.169, 1579.029
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 786432, 100, 1000, 488.591, 0.489, 0.482, 0.522, 1609.591, 1630.898
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 917504, 100, 1000, 563.283, 0.563, 0.556, 0.581, 1628.852, 1650.092
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 1048576, 100, 1000, 632.730, 0.633, 0.622, 0.675, 1657.224, 1684.470
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 1179648, 100, 1000, 708.006, 0.708, 0.698, 0.820, 1666.155, 1689.304
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 1310720, 100, 1000, 775.274, 0.775, 0.764, 0.808, 1690.654, 1715.746
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 1441792, 100, 1000, 839.667, 0.840, 0.828, 0.872, 1717.100, 1741.766
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 1572864, 100, 1000, 922.282, 0.922, 0.909, 0.956, 1705.404, 1729.547
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 1703936, 100, 1000, 989.710, 0.990, 0.978, 1.024, 1721.651, 1742.066
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 1835008, 100, 1000, 1065.263, 1.065, 1.050, 1.099, 1722.587, 1748.186
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 1966080, 100, 1000, 1133.762, 1.134, 1.117, 1.165, 1734.120, 1759.500
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 2097152, 100, 1000, 1211.893, 1.212, 1.197, 1.249, 1730.476, 1752.299
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 2228224, 100, 1000, 1279.055, 1.279, 1.264, 1.296, 1742.086, 1762.702
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 2359296, 100, 1000, 1355.160, 1.355, 1.339, 1.394, 1740.972, 1761.973
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 2490368, 100, 1000, 1426.283, 1.426, 1.412, 1.460, 1746.054, 1763.637
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 2621440, 100, 1000, 1500.985, 1.501, 1.483, 1.534, 1746.480, 1767.613
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 2752512, 100, 1000, 1568.831, 1.569, 1.551, 1.602, 1754.499, 1774.404
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 2883584, 100, 1000, 1633.285, 1.633, 1.618, 1.667, 1765.512, 1781.750
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 3014656, 100, 1000, 1717.208, 1.717, 1.702, 1.753, 1755.557, 1771.260
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 3145728, 100, 1000, 1786.412, 1.786, 1.766, 1.819, 1760.920, 1781.677
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 3276800, 100, 1000, 1865.227, 1.865, 1.849, 1.898, 1756.784, 1771.872
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 3407872, 100, 1000, 1930.164, 1.930, 1.912, 1.974, 1765.587, 1782.032
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 3538944, 100, 1000, 2008.126, 2.008, 1.990, 2.044, 1762.312, 1777.921
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 3670016, 100, 1000, 2081.271, 2.081, 2.065, 2.461, 1763.354, 1777.585
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 3801088, 100, 1000, 2159.154, 2.159, 2.137, 2.781, 1760.453, 1778.603
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 3932160, 100, 1000, 2228.265, 2.228, 2.207, 2.704, 1764.673, 1782.032
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 4063232, 100, 1000, 2313.167, 2.313, 2.284, 2.800, 1756.566, 1779.372
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 4194304, 100, 1000, 2378.271, 2.378, 2.357, 2.861, 1763.594, 1779.491
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 4325376, 100, 1000, 2440.542, 2.441, 2.423, 2.818, 1772.301, 1785.433
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 4456448, 100, 1000, 2522.773, 2.523, 2.500, 2.953, 1766.488, 1782.579
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 4587520, 100, 1000, 2591.409, 2.591, 2.570, 3.282, 1770.280, 1784.861
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 4718592, 100, 1000, 2666.282, 2.666, 2.644, 3.013, 1769.727, 1784.728
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 4849664, 100, 1000, 2732.678, 2.733, 2.711, 3.111, 1774.693, 1788.562
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 4980736, 100, 1000, 2811.500, 2.811, 2.783, 3.203, 1771.559, 1789.695
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 5111808, 100, 1000, 2881.055, 2.881, 2.856, 3.362, 1774.283, 1790.049
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 5242880, 100, 1000, 2958.542, 2.959, 2.929, 3.465, 1772.116, 1790.210
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 5373952, 100, 1000, 3023.746, 3.024, 3.000, 3.403, 1777.250, 1791.126
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 5505024, 100, 1000, 3105.817, 3.106, 3.076, 3.554, 1772.488, 1789.391
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 5636096, 100, 1000, 3174.015, 3.174, 3.149, 3.695, 1775.699, 1789.573
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 5767168, 100, 1000, 3236.337, 3.236, 3.208, 3.611, 1782.005, 1797.530
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 5898240, 100, 1000, 3319.511, 3.320, 3.293, 3.935, 1776.840, 1791.254
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 6029312, 100, 1000, 3386.548, 3.387, 3.355, 3.942, 1780.371, 1797.245
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 6160384, 100, 1000, 3465.190, 3.465, 3.445, 3.932, 1777.791, 1788.464
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 6291456, 100, 1000, 3533.940, 3.534, 3.510, 4.219, 1780.295, 1792.495
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 6422528, 100, 1000, 3611.257, 3.611, 3.587, 4.068, 1778.474, 1790.322
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 6553600, 100, 1000, 3677.916, 3.678, 3.647, 4.461, 1781.879, 1796.743
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 6684672, 100, 1000, 3755.175, 3.755, 3.724, 4.349, 1780.122, 1794.901
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 6815744, 100, 1000, 3824.421, 3.824, 3.798, 4.332, 1782.163, 1794.554
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 6946816, 100, 1000, 3899.848, 3.900, 3.872, 4.291, 1781.304, 1794.205
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 7077888, 100, 1000, 3967.871, 3.968, 3.944, 4.685, 1783.800, 1794.524
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 7208960, 100, 1000, 4031.475, 4.031, 4.005, 4.428, 1788.169, 1800.051
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 7340032, 100, 1000, 4117.537, 4.118, 4.085, 4.808, 1782.627, 1796.843
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 7471104, 100, 1000, 4186.958, 4.187, 4.160, 4.743, 1784.375, 1795.925
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 7602176, 100, 1000, 4263.533, 4.264, 4.234, 4.816, 1783.070, 1795.568
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 7733248, 100, 1000, 4330.498, 4.330, 4.303, 4.852, 1785.764, 1796.999
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 7864320, 100, 1000, 4411.305, 4.411, 4.380, 5.131, 1782.765, 1795.323
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 7995392, 100, 1000, 4480.015, 4.480, 4.449, 5.079, 1784.680, 1797.073
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 8126464, 100, 1000, 4553.000, 4.553, 4.526, 5.130, 1784.859, 1795.475
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 8257536, 100, 1000, 4621.935, 4.622, 4.593, 5.118, 1786.597, 1797.906
NVIDIA GeForce RTX 2060, 30, 1.15.0.0, uint64_t, 8388608, 100, 1000, 4703.969, 4.704, 4.667, 5.628, 1783.304, 1797.280

@allanmac
Copy link
Author

$ ./sort_cub_32 1; ./sort_cub_64 1
Quadro K2200 ( 5)
Device, Multiprocessors, CUB, Type, Keys, Warmup, Trials, Total Msecs, Avg. Msecs, Min Msecs, Max Msecs, Avg. Mkeys/s, Max. Mkeys/s
Quadro K2200, 5, 1.15.0.0, uint32_t, 131072, 100, 1000, 378.889, 0.379, 0.362, 0.816, 345.938, 361.710
Quadro K2200, 5, 1.15.0.0, uint32_t, 262144, 100, 1000, 639.755, 0.640, 0.634, 0.652, 409.757, 413.362
Quadro K2200, 5, 1.15.0.0, uint32_t, 393216, 100, 1000, 884.612, 0.885, 0.877, 0.983, 444.507, 448.222
Quadro K2200, 5, 1.15.0.0, uint32_t, 524288, 100, 1000, 1126.159, 1.126, 1.117, 1.137, 465.554, 469.348
Quadro K2200, 5, 1.15.0.0, uint32_t, 655360, 100, 1000, 1359.210, 1.359, 1.343, 1.378, 482.162, 488.107
Quadro K2200, 5, 1.15.0.0, uint32_t, 786432, 100, 1000, 1605.372, 1.605, 1.587, 1.622, 489.875, 495.614
Quadro K2200, 5, 1.15.0.0, uint32_t, 917504, 100, 1000, 1825.703, 1.826, 1.810, 1.843, 502.548, 506.949
Quadro K2200, 5, 1.15.0.0, uint32_t, 1048576, 100, 1000, 2075.836, 2.076, 2.059, 2.102, 505.134, 509.286
Quadro K2200, 5, 1.15.0.0, uint32_t, 1179648, 100, 1000, 2308.884, 2.309, 2.292, 2.412, 510.917, 514.702
Quadro K2200, 5, 1.15.0.0, uint32_t, 1310720, 100, 1000, 2528.077, 2.528, 2.513, 2.557, 518.465, 521.478
Quadro K2200, 5, 1.15.0.0, uint32_t, 1441792, 100, 1000, 2763.492, 2.763, 2.738, 2.796, 521.728, 526.515
Quadro K2200, 5, 1.15.0.0, uint32_t, 1572864, 100, 1000, 3001.023, 3.001, 2.978, 3.023, 524.109, 528.113
Quadro K2200, 5, 1.15.0.0, uint32_t, 1703936, 100, 1000, 3210.681, 3.211, 3.188, 3.240, 530.709, 534.458
Quadro K2200, 5, 1.15.0.0, uint32_t, 1835008, 100, 1000, 3454.231, 3.454, 3.433, 3.551, 531.235, 534.571
Quadro K2200, 5, 1.15.0.0, uint32_t, 1966080, 100, 1000, 3685.278, 3.685, 3.666, 3.775, 533.496, 536.285
Quadro K2200, 5, 1.15.0.0, uint32_t, 2097152, 100, 1000, 3910.639, 3.911, 3.887, 3.938, 536.268, 539.489
Quadro K2200, 5, 1.15.0.0, uint32_t, 2228224, 100, 1000, 4140.514, 4.141, 4.113, 4.228, 538.152, 541.694
Quadro K2200, 5, 1.15.0.0, uint32_t, 2359296, 100, 1000, 4371.611, 4.372, 4.341, 4.406, 539.686, 543.436
Quadro K2200, 5, 1.15.0.0, uint32_t, 2490368, 100, 1000, 4590.881, 4.591, 4.570, 4.619, 542.460, 544.994
Quadro K2200, 5, 1.15.0.0, uint32_t, 2621440, 100, 1000, 4823.680, 4.824, 4.794, 4.856, 543.452, 546.848
Quadro K2200, 5, 1.15.0.0, uint32_t, 2752512, 100, 1000, 5038.208, 5.038, 5.016, 5.127, 546.328, 548.788
Quadro K2200, 5, 1.15.0.0, uint32_t, 2883584, 100, 1000, 5297.167, 5.297, 5.259, 5.340, 544.363, 548.287
Quadro K2200, 5, 1.15.0.0, uint32_t, 3014656, 100, 1000, 5490.328, 5.490, 5.470, 5.520, 549.085, 551.114
Quadro K2200, 5, 1.15.0.0, uint32_t, 3145728, 100, 1000, 5735.433, 5.735, 5.700, 6.014, 548.473, 551.839
Quadro K2200, 5, 1.15.0.0, uint32_t, 3276800, 100, 1000, 5978.821, 5.979, 5.948, 6.057, 548.068, 550.869
Quadro K2200, 5, 1.15.0.0, uint32_t, 3407872, 100, 1000, 6187.892, 6.188, 6.159, 6.232, 550.732, 553.350
Quadro K2200, 5, 1.15.0.0, uint32_t, 3538944, 100, 1000, 6416.128, 6.416, 6.388, 6.508, 551.570, 553.988
Quadro K2200, 5, 1.15.0.0, uint32_t, 3670016, 100, 1000, 6651.636, 6.652, 6.618, 6.718, 551.746, 554.579
Quadro K2200, 5, 1.15.0.0, uint32_t, 3801088, 100, 1000, 6876.844, 6.877, 6.835, 6.923, 552.737, 556.126
Quadro K2200, 5, 1.15.0.0, uint32_t, 3932160, 100, 1000, 7106.413, 7.106, 7.069, 7.207, 553.326, 556.270
Quadro K2200, 5, 1.15.0.0, uint32_t, 4063232, 100, 1000, 7342.113, 7.342, 7.308, 7.426, 553.415, 555.998
Quadro K2200, 5, 1.15.0.0, uint32_t, 4194304, 100, 1000, 7564.409, 7.564, 7.522, 7.659, 554.479, 557.635
Quadro K2200, 5, 1.15.0.0, uint32_t, 4325376, 100, 1000, 7788.837, 7.789, 7.749, 7.867, 555.330, 558.220
Quadro K2200, 5, 1.15.0.0, uint32_t, 4456448, 100, 1000, 8011.906, 8.012, 7.977, 8.094, 556.228, 558.629
Quadro K2200, 5, 1.15.0.0, uint32_t, 4587520, 100, 1000, 8256.665, 8.257, 8.208, 8.319, 555.614, 558.939
Quadro K2200, 5, 1.15.0.0, uint32_t, 4718592, 100, 1000, 8482.693, 8.483, 8.435, 8.856, 556.261, 559.382
Quadro K2200, 5, 1.15.0.0, uint32_t, 4849664, 100, 1000, 8721.871, 8.722, 8.679, 8.821, 556.035, 558.785
Quadro K2200, 5, 1.15.0.0, uint32_t, 4980736, 100, 1000, 8932.444, 8.932, 8.890, 8.987, 557.601, 560.290
Quadro K2200, 5, 1.15.0.0, uint32_t, 5111808, 100, 1000, 9165.006, 9.165, 9.124, 9.288, 557.753, 560.234
Quadro K2200, 5, 1.15.0.0, uint32_t, 5242880, 100, 1000, 9410.310, 9.410, 9.358, 9.489, 557.142, 560.232
Quadro K2200, 5, 1.15.0.0, uint32_t, 5373952, 100, 1000, 9630.553, 9.631, 9.583, 9.752, 558.011, 560.774
Quadro K2200, 5, 1.15.0.0, uint32_t, 5505024, 100, 1000, 9835.416, 9.835, 9.795, 9.908, 559.714, 562.036
Quadro K2200, 5, 1.15.0.0, uint32_t, 5636096, 100, 1000, 10082.211, 10.082, 10.028, 10.211, 559.014, 562.022
Quadro K2200, 5, 1.15.0.0, uint32_t, 5767168, 100, 1000, 10319.021, 10.319, 10.267, 10.411, 558.887, 561.708
Quadro K2200, 5, 1.15.0.0, uint32_t, 5898240, 100, 1000, 10538.291, 10.538, 10.482, 10.639, 559.696, 562.691
Quadro K2200, 5, 1.15.0.0, uint32_t, 6029312, 100, 1000, 10754.114, 10.754, 10.711, 10.815, 560.652, 562.920
Quadro K2200, 5, 1.15.0.0, uint32_t, 6160384, 100, 1000, 10990.657, 10.991, 10.938, 11.069, 560.511, 563.212
Quadro K2200, 5, 1.15.0.0, uint32_t, 6291456, 100, 1000, 11229.071, 11.229, 11.168, 11.324, 560.283, 563.327
Quadro K2200, 5, 1.15.0.0, uint32_t, 6422528, 100, 1000, 11433.194, 11.433, 11.387, 11.559, 561.744, 564.027
Quadro K2200, 5, 1.15.0.0, uint32_t, 6553600, 100, 1000, 11677.139, 11.677, 11.623, 11.760, 561.233, 563.867
Quadro K2200, 5, 1.15.0.0, uint32_t, 6684672, 100, 1000, 11889.778, 11.890, 11.836, 12.001, 562.220, 564.776
Quadro K2200, 5, 1.15.0.0, uint32_t, 6815744, 100, 1000, 12138.031, 12.138, 12.073, 12.231, 561.520, 564.536
Quadro K2200, 5, 1.15.0.0, uint32_t, 6946816, 100, 1000, 12357.925, 12.358, 12.296, 12.462, 562.135, 564.946
Quadro K2200, 5, 1.15.0.0, uint32_t, 7077888, 100, 1000, 12582.987, 12.583, 12.536, 12.683, 562.497, 564.612
Quadro K2200, 5, 1.15.0.0, uint32_t, 7208960, 100, 1000, 12815.256, 12.815, 12.747, 12.891, 562.530, 565.553
Quadro K2200, 5, 1.15.0.0, uint32_t, 7340032, 100, 1000, 13049.531, 13.050, 12.987, 13.143, 562.475, 565.201
Quadro K2200, 5, 1.15.0.0, uint32_t, 7471104, 100, 1000, 13290.950, 13.291, 13.198, 13.422, 562.120, 566.098
Quadro K2200, 5, 1.15.0.0, uint32_t, 7602176, 100, 1000, 13484.765, 13.485, 13.435, 13.993, 563.760, 565.833
Quadro K2200, 5, 1.15.0.0, uint32_t, 7733248, 100, 1000, 13720.304, 13.720, 13.663, 13.829, 563.635, 566.011
Quadro K2200, 5, 1.15.0.0, uint32_t, 7864320, 100, 1000, 13948.978, 13.949, 13.884, 14.040, 563.792, 566.429
Quadro K2200, 5, 1.15.0.0, uint32_t, 7995392, 100, 1000, 14196.755, 14.197, 14.120, 14.284, 563.184, 566.232
Quadro K2200, 5, 1.15.0.0, uint32_t, 8126464, 100, 1000, 14404.189, 14.404, 14.350, 14.494, 564.174, 566.309
Quadro K2200, 5, 1.15.0.0, uint32_t, 8257536, 100, 1000, 14618.712, 14.619, 14.557, 14.724, 564.861, 567.273
Quadro K2200, 5, 1.15.0.0, uint32_t, 8388608, 100, 1000, 14892.316, 14.892, 14.817, 15.003, 563.284, 566.132
Quadro K2200 ( 5)
Device, Multiprocessors, CUB, Type, Keys, Warmup, Trials, Total Msecs, Avg. Msecs, Min Msecs, Max Msecs, Avg. Mkeys/s, Max. Mkeys/s
Quadro K2200, 5, 1.15.0.0, uint64_t, 131072, 100, 1000, 415.018, 0.415, 0.408, 0.437, 315.822, 321.608
Quadro K2200, 5, 1.15.0.0, uint64_t, 262144, 100, 1000, 843.392, 0.843, 0.833, 0.872, 310.821, 314.883
Quadro K2200, 5, 1.15.0.0, uint64_t, 393216, 100, 1000, 1335.067, 1.335, 1.320, 1.352, 294.529, 297.826
Quadro K2200, 5, 1.15.0.0, uint64_t, 524288, 100, 1000, 1804.428, 1.804, 1.789, 1.821, 290.556, 293.063
Quadro K2200, 5, 1.15.0.0, uint64_t, 655360, 100, 1000, 2254.571, 2.255, 2.239, 2.295, 290.681, 292.701
Quadro K2200, 5, 1.15.0.0, uint64_t, 786432, 100, 1000, 2704.927, 2.705, 2.685, 2.816, 290.741, 292.861
Quadro K2200, 5, 1.15.0.0, uint64_t, 917504, 100, 1000, 3172.602, 3.173, 3.154, 3.193, 289.196, 290.918
Quadro K2200, 5, 1.15.0.0, uint64_t, 1048576, 100, 1000, 3635.665, 3.636, 3.610, 3.732, 288.414, 290.491
Quadro K2200, 5, 1.15.0.0, uint64_t, 1179648, 100, 1000, 4085.520, 4.086, 4.058, 4.112, 288.739, 290.714
Quadro K2200, 5, 1.15.0.0, uint64_t, 1310720, 100, 1000, 4533.665, 4.534, 4.500, 4.563, 289.108, 291.255
Quadro K2200, 5, 1.15.0.0, uint64_t, 1441792, 100, 1000, 5006.416, 5.006, 4.977, 5.110, 287.989, 289.682
Quadro K2200, 5, 1.15.0.0, uint64_t, 1572864, 100, 1000, 5453.299, 5.453, 5.422, 5.551, 288.424, 290.094
Quadro K2200, 5, 1.15.0.0, uint64_t, 1703936, 100, 1000, 5874.647, 5.875, 5.843, 5.973, 290.049, 291.624
Quadro K2200, 5, 1.15.0.0, uint64_t, 1835008, 100, 1000, 6354.182, 6.354, 6.316, 6.464, 288.787, 290.554
Quadro K2200, 5, 1.15.0.0, uint64_t, 1966080, 100, 1000, 6790.401, 6.790, 6.747, 6.890, 289.538, 291.385
Quadro K2200, 5, 1.15.0.0, uint64_t, 2097152, 100, 1000, 7254.366, 7.254, 7.211, 7.323, 289.088, 290.832
Quadro K2200, 5, 1.15.0.0, uint64_t, 2228224, 100, 1000, 7701.411, 7.701, 7.658, 7.785, 289.327, 290.950
Quadro K2200, 5, 1.15.0.0, uint64_t, 2359296, 100, 1000, 8145.440, 8.145, 8.105, 8.248, 289.646, 291.096
Quadro K2200, 5, 1.15.0.0, uint64_t, 2490368, 100, 1000, 8589.478, 8.589, 8.549, 8.701, 289.932, 291.322
Quadro K2200, 5, 1.15.0.0, uint64_t, 2621440, 100, 1000, 9037.647, 9.038, 8.998, 9.141, 290.058, 291.335
Quadro K2200, 5, 1.15.0.0, uint64_t, 2752512, 100, 1000, 9496.182, 9.496, 9.459, 9.571, 289.855, 290.986
Quadro K2200, 5, 1.15.0.0, uint64_t, 2883584, 100, 1000, 9932.137, 9.932, 9.899, 10.030, 290.329, 291.301
Quadro K2200, 5, 1.15.0.0, uint64_t, 3014656, 100, 1000, 10391.448, 10.391, 10.358, 10.488, 290.109, 291.060
Quadro K2200, 5, 1.15.0.0, uint64_t, 3145728, 100, 1000, 10837.993, 10.838, 10.797, 10.939, 290.250, 291.357
Quadro K2200, 5, 1.15.0.0, uint64_t, 3276800, 100, 1000, 11251.473, 11.251, 11.207, 11.351, 291.233, 292.384
Quadro K2200, 5, 1.15.0.0, uint64_t, 3407872, 100, 1000, 11734.424, 11.734, 11.689, 11.836, 290.417, 291.557
Quadro K2200, 5, 1.15.0.0, uint64_t, 3538944, 100, 1000, 12165.749, 12.166, 12.117, 12.265, 290.894, 292.061
Quadro K2200, 5, 1.15.0.0, uint64_t, 3670016, 100, 1000, 12624.731, 12.625, 12.586, 12.720, 290.701, 291.595
Quadro K2200, 5, 1.15.0.0, uint64_t, 3801088, 100, 1000, 13056.549, 13.057, 13.006, 13.143, 291.125, 292.264
Quadro K2200, 5, 1.15.0.0, uint64_t, 3932160, 100, 1000, 13510.236, 13.510, 13.469, 13.609, 291.050, 291.941
Quadro K2200, 5, 1.15.0.0, uint64_t, 4063232, 100, 1000, 13949.322, 13.949, 13.902, 14.075, 291.285, 292.268
Quadro K2200, 5, 1.15.0.0, uint64_t, 4194304, 100, 1000, 14399.020, 14.399, 14.344, 14.501, 291.291, 292.404
Quadro K2200, 5, 1.15.0.0, uint64_t, 4325376, 100, 1000, 14818.629, 14.819, 14.758, 14.920, 291.888, 293.078
Quadro K2200, 5, 1.15.0.0, uint64_t, 4456448, 100, 1000, 15290.629, 15.291, 15.237, 15.387, 291.450, 292.478
Quadro K2200, 5, 1.15.0.0, uint64_t, 4587520, 100, 1000, 15726.806, 15.727, 15.673, 15.803, 291.701, 292.694
Quadro K2200, 5, 1.15.0.0, uint64_t, 4718592, 100, 1000, 16144.619, 16.145, 16.087, 16.308, 292.270, 293.319
Quadro K2200, 5, 1.15.0.0, uint64_t, 4849664, 100, 1000, 16594.859, 16.595, 16.547, 16.696, 292.239, 293.078
Quadro K2200, 5, 1.15.0.0, uint64_t, 4980736, 100, 1000, 17070.090, 17.070, 17.021, 17.189, 291.781, 292.624
Quadro K2200, 5, 1.15.0.0, uint64_t, 5111808, 100, 1000, 17478.402, 17.478, 17.417, 17.594, 292.464, 293.487
Quadro K2200, 5, 1.15.0.0, uint64_t, 5242880, 100, 1000, 17918.230, 17.918, 17.865, 18.062, 292.600, 293.480
Quadro K2200, 5, 1.15.0.0, uint64_t, 5373952, 100, 1000, 18382.045, 18.382, 18.326, 18.477, 292.348, 293.242
Quadro K2200, 5, 1.15.0.0, uint64_t, 5505024, 100, 1000, 18816.900, 18.817, 18.768, 18.909, 292.557, 293.324
Quadro K2200, 5, 1.15.0.0, uint64_t, 5636096, 100, 1000, 19279.232, 19.279, 19.223, 19.403, 292.340, 293.203
Quadro K2200, 5, 1.15.0.0, uint64_t, 5767168, 100, 1000, 19705.645, 19.706, 19.644, 19.786, 292.666, 293.584
Quadro K2200, 5, 1.15.0.0, uint64_t, 5898240, 100, 1000, 20190.789, 20.191, 20.121, 20.302, 292.125, 293.143
Quadro K2200, 5, 1.15.0.0, uint64_t, 6029312, 100, 1000, 20633.969, 20.634, 20.570, 20.743, 292.203, 293.109
Quadro K2200, 5, 1.15.0.0, uint64_t, 6160384, 100, 1000, 21051.604, 21.052, 20.995, 21.161, 292.633, 293.426
Quadro K2200, 5, 1.15.0.0, uint64_t, 6291456, 100, 1000, 21507.178, 21.507, 21.433, 21.609, 292.528, 293.543
Quadro K2200, 5, 1.15.0.0, uint64_t, 6422528, 100, 1000, 21953.455, 21.953, 21.870, 22.067, 292.552, 293.663
Quadro K2200, 5, 1.15.0.0, uint64_t, 6553600, 100, 1000, 22367.307, 22.367, 22.303, 22.478, 292.999, 293.842
Quadro K2200, 5, 1.15.0.0, uint64_t, 6684672, 100, 1000, 22790.842, 22.791, 22.717, 22.883, 293.305, 294.260
Quadro K2200, 5, 1.15.0.0, uint64_t, 6815744, 100, 1000, 23312.830, 23.313, 23.240, 23.454, 292.360, 293.280
Quadro K2200, 5, 1.15.0.0, uint64_t, 6946816, 100, 1000, 23693.637, 23.694, 23.623, 23.809, 293.193, 294.076
Quadro K2200, 5, 1.15.0.0, uint64_t, 7077888, 100, 1000, 24154.064, 24.154, 24.083, 24.253, 293.031, 293.895
Quadro K2200, 5, 1.15.0.0, uint64_t, 7208960, 100, 1000, 24645.039, 24.645, 24.568, 24.741, 292.512, 293.425
Quadro K2200, 5, 1.15.0.0, uint64_t, 7340032, 100, 1000, 25043.445, 25.043, 24.979, 25.147, 293.092, 293.845
Quadro K2200, 5, 1.15.0.0, uint64_t, 7471104, 100, 1000, 25509.453, 25.509, 25.421, 25.628, 292.876, 293.897
Quadro K2200, 5, 1.15.0.0, uint64_t, 7602176, 100, 1000, 25917.277, 25.917, 25.847, 26.026, 293.325, 294.125
Quadro K2200, 5, 1.15.0.0, uint64_t, 7733248, 100, 1000, 26376.803, 26.377, 26.303, 26.497, 293.184, 294.004
Quadro K2200, 5, 1.15.0.0, uint64_t, 7864320, 100, 1000, 26846.506, 26.847, 26.761, 26.962, 292.936, 293.870
Quadro K2200, 5, 1.15.0.0, uint64_t, 7995392, 100, 1000, 27233.934, 27.234, 27.163, 27.355, 293.582, 294.346
Quadro K2200, 5, 1.15.0.0, uint64_t, 8126464, 100, 1000, 27720.045, 27.720, 27.634, 27.808, 293.162, 294.071
Quadro K2200, 5, 1.15.0.0, uint64_t, 8257536, 100, 1000, 28168.227, 28.168, 28.085, 28.304, 293.151, 294.022
Quadro K2200, 5, 1.15.0.0, uint64_t, 8388608, 100, 1000, 28562.717, 28.563, 28.487, 28.712, 293.691, 294.469

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment