Created
March 16, 2017 15:24
-
-
Save killeent/75a77a393447b0b4e89b35a94e75d216 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "TH.h" | |
#include "THC.h" | |
#include <cstdio> | |
#include "sys/time.h" | |
#include <vector> | |
#include "cuda_profiler_api.h" | |
const long loops = 10; | |
void compare(THIntTensor *input, THIntTensor *mode, THLongTensor *indices, | |
THCState *state, THCudaIntTensor *cinput, THCudaIntTensor *cmode, THCudaLongTensor *cindices, | |
int dimension) { | |
struct timeval th_start, th_end, th_result; | |
struct timeval thc_start, thc_end, thc_result; | |
long sum = 0, csum = 0; | |
for (int i = 0; i < loops; ++i) { | |
gettimeofday(&th_start, NULL); | |
THIntTensor_mode(mode, indices, input, dimension); | |
gettimeofday(&th_end, NULL); | |
timersub(&th_end, &th_start, &th_result); | |
sum += th_result.tv_usec; | |
} | |
if (getenv("NVVP_RUN")) { | |
cudaProfilerStart(); | |
THCudaIntTensor_mode(state, cmode, cindices, cinput, dimension); | |
cudaProfilerStop(); | |
cudaDeviceSynchronize(); | |
} else { | |
for (int i = 0; i < loops; ++i) { | |
gettimeofday(&thc_start, NULL); | |
THCudaIntTensor_mode(state, cmode, cindices, cinput, dimension); | |
cudaDeviceSynchronize(); | |
gettimeofday(&thc_end, NULL); | |
timersub(&thc_end, &thc_start, &thc_result); | |
csum += thc_result.tv_usec; | |
} | |
printf("%ld usec (TH), %ld usec (THC)\n", sum / loops, csum / loops); | |
} | |
} | |
void compare1D(THCState *state, long d0) { | |
THIntTensor *input = THIntTensor_newWithSize1d(d0); | |
THIntTensor *mode = THIntTensor_new(); | |
THLongTensor *indices = THLongTensor_new(); | |
THCudaIntTensor *cinput = THCudaIntTensor_newWithSize1d(state, d0); | |
THCudaIntTensor *cmode = THCudaIntTensor_new(state); | |
THCudaLongTensor *cindices = THCudaLongTensor_new(state); | |
printf("Testing 1D Tensor of size %ld: ", d0); | |
compare(input, mode, indices, state, cinput, cmode, cindices, 0); | |
THIntTensor_free(input); | |
THIntTensor_free(mode); | |
THLongTensor_free(indices); | |
THCudaIntTensor_free(state, cinput); | |
THCudaIntTensor_free(state, cmode); | |
THCudaLongTensor_free(state, cindices); | |
} | |
void compare2D(THCState *state, long d0, long d1, int dimension) { | |
THIntTensor *input = THIntTensor_newWithSize2d(d0, d1); | |
THIntTensor *mode = THIntTensor_new(); | |
THLongTensor *indices = THLongTensor_new(); | |
THCudaIntTensor *cinput = THCudaIntTensor_newWithSize2d(state, d0, d1); | |
THCudaIntTensor *cmode = THCudaIntTensor_new(state); | |
THCudaLongTensor *cindices = THCudaLongTensor_new(state); | |
printf("Testing 2D Tensor of size (%ld, %ld) along dimension %d: ", d0, d1, dimension); | |
compare(input, mode, indices, state, cinput, cmode, cindices, dimension); | |
THIntTensor_free(input); | |
THIntTensor_free(mode); | |
THLongTensor_free(indices); | |
THCudaIntTensor_free(state, cinput); | |
THCudaIntTensor_free(state, cmode); | |
THCudaLongTensor_free(state, cindices); | |
} | |
void compare3D(THCState *state, long d0, long d1, long d2, int dimension) { | |
THIntTensor *input = THIntTensor_newWithSize3d(d0, d1, d2); | |
THIntTensor *mode = THIntTensor_new(); | |
THLongTensor *indices = THLongTensor_new(); | |
THCudaIntTensor *cinput = THCudaIntTensor_newWithSize3d(state, d0, d1, d2); | |
THCudaIntTensor *cmode = THCudaIntTensor_new(state); | |
THCudaLongTensor *cindices = THCudaLongTensor_new(state); | |
printf("Testing 3D Tensor of size (%ld, %ld, %ld) along dimension %d: ", d0, d1, d2, dimension); | |
compare(input, mode, indices, state, cinput, cmode, cindices, dimension); | |
THIntTensor_free(input); | |
THIntTensor_free(mode); | |
THLongTensor_free(indices); | |
THCudaIntTensor_free(state, cinput); | |
THCudaIntTensor_free(state, cmode); | |
THCudaLongTensor_free(state, cindices); | |
} | |
// TODO: presize tensor | |
int main() { | |
THCState *state = THCState_alloc(); | |
THCudaInit(state); | |
printf("Testing average duration for %ld loops\n" , loops); | |
std::vector<int> oned = {8, 16, 32, 64, 128, 256, 512, 1024, 2048}; | |
if (getenv("NVVP_RUN")) { | |
oned = {16}; | |
} | |
for (const auto& size : oned) { | |
compare1D(state, size); | |
} | |
if (!getenv("NVVP_RUN")) { | |
std::vector<std::pair<int, int>> twod = { | |
std::make_pair<int, int>(1, 32), | |
std::make_pair<int, int>(1, 64), | |
std::make_pair<int, int>(1, 128), | |
std::make_pair<int, int>(1, 1024), | |
std::make_pair<int, int>(16, 16), | |
std::make_pair<int, int>(32, 32), | |
std::make_pair<int, int>(128, 128), | |
std::make_pair<int, int>(256, 256), | |
std::make_pair<int, int>(512, 512), | |
std::make_pair<int, int>(1024, 1024), | |
std::make_pair<int, int>(16, 128), | |
std::make_pair<int, int>(32, 128), | |
std::make_pair<int, int>(32, 256), | |
}; | |
for (const auto& sizes : twod) { | |
compare2D(state, sizes.first, sizes.second, 1); | |
compare2D(state, sizes.second, sizes.first, 0); | |
} | |
std::vector<std::tuple<int, int, int>> threed = { | |
std::make_tuple<int, int, int>(1, 1, 128), | |
std::make_tuple<int, int, int>(1, 32, 128), | |
std::make_tuple<int, int, int>(1, 32, 1024), | |
std::make_tuple<int, int, int>(16, 16, 128), | |
std::make_tuple<int, int, int>(16, 128, 128), | |
std::make_tuple<int, int, int>(32, 128, 128), | |
std::make_tuple<int, int, int>(16, 256, 256), | |
std::make_tuple<int, int, int>(32, 256, 256), | |
std::make_tuple<int, int, int>(128, 128, 128), | |
std::make_tuple<int, int, int>(128, 256, 256), | |
}; | |
for (const auto& sizes : threed) { | |
int d0 = std::get<0>(sizes); | |
int d1 = std::get<1>(sizes); | |
int d2 = std::get<2>(sizes); | |
compare3D(state, d0, d1, d2, 2); | |
compare3D(state, d0, d2, d1, 1); | |
compare3D(state, d2, d1, d0, 0); | |
} | |
} | |
THCudaShutdown(state); | |
THCState_free(state); | |
cudaDeviceReset(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment