Skip to content

Instantly share code, notes, and snippets.

@killeent
Created March 16, 2017 15:24
Show Gist options
  • Save killeent/75a77a393447b0b4e89b35a94e75d216 to your computer and use it in GitHub Desktop.
Save killeent/75a77a393447b0b4e89b35a94e75d216 to your computer and use it in GitHub Desktop.
#include "TH.h"
#include "THC.h"
#include <cstdio>
#include "sys/time.h"
#include <vector>
#include "cuda_profiler_api.h"
const long loops = 10;
void compare(THIntTensor *input, THIntTensor *mode, THLongTensor *indices,
THCState *state, THCudaIntTensor *cinput, THCudaIntTensor *cmode, THCudaLongTensor *cindices,
int dimension) {
struct timeval th_start, th_end, th_result;
struct timeval thc_start, thc_end, thc_result;
long sum = 0, csum = 0;
for (int i = 0; i < loops; ++i) {
gettimeofday(&th_start, NULL);
THIntTensor_mode(mode, indices, input, dimension);
gettimeofday(&th_end, NULL);
timersub(&th_end, &th_start, &th_result);
sum += th_result.tv_usec;
}
if (getenv("NVVP_RUN")) {
cudaProfilerStart();
THCudaIntTensor_mode(state, cmode, cindices, cinput, dimension);
cudaProfilerStop();
cudaDeviceSynchronize();
} else {
for (int i = 0; i < loops; ++i) {
gettimeofday(&thc_start, NULL);
THCudaIntTensor_mode(state, cmode, cindices, cinput, dimension);
cudaDeviceSynchronize();
gettimeofday(&thc_end, NULL);
timersub(&thc_end, &thc_start, &thc_result);
csum += thc_result.tv_usec;
}
printf("%ld usec (TH), %ld usec (THC)\n", sum / loops, csum / loops);
}
}
void compare1D(THCState *state, long d0) {
THIntTensor *input = THIntTensor_newWithSize1d(d0);
THIntTensor *mode = THIntTensor_new();
THLongTensor *indices = THLongTensor_new();
THCudaIntTensor *cinput = THCudaIntTensor_newWithSize1d(state, d0);
THCudaIntTensor *cmode = THCudaIntTensor_new(state);
THCudaLongTensor *cindices = THCudaLongTensor_new(state);
printf("Testing 1D Tensor of size %ld: ", d0);
compare(input, mode, indices, state, cinput, cmode, cindices, 0);
THIntTensor_free(input);
THIntTensor_free(mode);
THLongTensor_free(indices);
THCudaIntTensor_free(state, cinput);
THCudaIntTensor_free(state, cmode);
THCudaLongTensor_free(state, cindices);
}
void compare2D(THCState *state, long d0, long d1, int dimension) {
THIntTensor *input = THIntTensor_newWithSize2d(d0, d1);
THIntTensor *mode = THIntTensor_new();
THLongTensor *indices = THLongTensor_new();
THCudaIntTensor *cinput = THCudaIntTensor_newWithSize2d(state, d0, d1);
THCudaIntTensor *cmode = THCudaIntTensor_new(state);
THCudaLongTensor *cindices = THCudaLongTensor_new(state);
printf("Testing 2D Tensor of size (%ld, %ld) along dimension %d: ", d0, d1, dimension);
compare(input, mode, indices, state, cinput, cmode, cindices, dimension);
THIntTensor_free(input);
THIntTensor_free(mode);
THLongTensor_free(indices);
THCudaIntTensor_free(state, cinput);
THCudaIntTensor_free(state, cmode);
THCudaLongTensor_free(state, cindices);
}
void compare3D(THCState *state, long d0, long d1, long d2, int dimension) {
THIntTensor *input = THIntTensor_newWithSize3d(d0, d1, d2);
THIntTensor *mode = THIntTensor_new();
THLongTensor *indices = THLongTensor_new();
THCudaIntTensor *cinput = THCudaIntTensor_newWithSize3d(state, d0, d1, d2);
THCudaIntTensor *cmode = THCudaIntTensor_new(state);
THCudaLongTensor *cindices = THCudaLongTensor_new(state);
printf("Testing 3D Tensor of size (%ld, %ld, %ld) along dimension %d: ", d0, d1, d2, dimension);
compare(input, mode, indices, state, cinput, cmode, cindices, dimension);
THIntTensor_free(input);
THIntTensor_free(mode);
THLongTensor_free(indices);
THCudaIntTensor_free(state, cinput);
THCudaIntTensor_free(state, cmode);
THCudaLongTensor_free(state, cindices);
}
// TODO: presize tensor
int main() {
THCState *state = THCState_alloc();
THCudaInit(state);
printf("Testing average duration for %ld loops\n" , loops);
std::vector<int> oned = {8, 16, 32, 64, 128, 256, 512, 1024, 2048};
if (getenv("NVVP_RUN")) {
oned = {16};
}
for (const auto& size : oned) {
compare1D(state, size);
}
if (!getenv("NVVP_RUN")) {
std::vector<std::pair<int, int>> twod = {
std::make_pair<int, int>(1, 32),
std::make_pair<int, int>(1, 64),
std::make_pair<int, int>(1, 128),
std::make_pair<int, int>(1, 1024),
std::make_pair<int, int>(16, 16),
std::make_pair<int, int>(32, 32),
std::make_pair<int, int>(128, 128),
std::make_pair<int, int>(256, 256),
std::make_pair<int, int>(512, 512),
std::make_pair<int, int>(1024, 1024),
std::make_pair<int, int>(16, 128),
std::make_pair<int, int>(32, 128),
std::make_pair<int, int>(32, 256),
};
for (const auto& sizes : twod) {
compare2D(state, sizes.first, sizes.second, 1);
compare2D(state, sizes.second, sizes.first, 0);
}
std::vector<std::tuple<int, int, int>> threed = {
std::make_tuple<int, int, int>(1, 1, 128),
std::make_tuple<int, int, int>(1, 32, 128),
std::make_tuple<int, int, int>(1, 32, 1024),
std::make_tuple<int, int, int>(16, 16, 128),
std::make_tuple<int, int, int>(16, 128, 128),
std::make_tuple<int, int, int>(32, 128, 128),
std::make_tuple<int, int, int>(16, 256, 256),
std::make_tuple<int, int, int>(32, 256, 256),
std::make_tuple<int, int, int>(128, 128, 128),
std::make_tuple<int, int, int>(128, 256, 256),
};
for (const auto& sizes : threed) {
int d0 = std::get<0>(sizes);
int d1 = std::get<1>(sizes);
int d2 = std::get<2>(sizes);
compare3D(state, d0, d1, d2, 2);
compare3D(state, d0, d2, d1, 1);
compare3D(state, d2, d1, d0, 0);
}
}
THCudaShutdown(state);
THCState_free(state);
cudaDeviceReset();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment