Skip to content

Instantly share code, notes, and snippets.

View killeent's full-sized avatar

Trevor Killeen killeent

View GitHub Profile
#include "TH.h"
#include "THC.h"
#include <cstdio>
#include "sys/time.h"
#include <vector>
#include "cuda_profiler_api.h"
const long loops = 10;
void compare(THIntTensor *input, THIntTensor *mode, THLongTensor *indices,
local runtests = false
if not cutorch then
require 'cutorch'
runtests = true
end
local test = {}
local minsize = 5
local maxsize = 10
local minvalue = 2
#ifndef THC_GENERIC_FILE
#define THC_GENERIC_FILE "generic/THCTensorMode.cu"
#else
THC_API void THCTensor_(calculateMode)(THCState *state,
THCTensor *values,
THCudaLongTensor *indices,
THCTensor *input,
THCudaLongStorage *sortBuffer,
int dimension,
#ifndef THC_TENSOR_MODE_CUH
#define THC_TENSOR_MODE_CUH
#include "THCNumerics.cuh"
#include "THCSortUtils.cuh"
struct ThrustHalfLess
{
__host__ __device__ inline bool operator()(const half& lhs, const half& rhs) {
return THCNumerics<half>::lt(lhs, rhs);
Sum of 10 trials used to generate results
----------------------------------------------------------------
CatArray for 2 size 1 1D Tensors took 0.00029397010803223 seconds.
CatArray for 4 size 1 1D Tensors took 0.00024986267089844 seconds.
CatArray for 8 size 1 1D Tensors took 0.00025296211242676 seconds.
CatArray for 32 size 1 1D Tensors took 0.00030803680419922 seconds.
CatArray for 128 size 1 1D Tensors took 0.00056195259094238 seconds.
CatArray for 512 size 1 1D Tensors took 0.0013229846954346 seconds.
CatArray for 1024 size 1 1D Tensors took 0.0022039413452148 seconds.
CatArray for 2048 size 1 1D Tensors took 0.004547119140625 seconds.
----------------------------------------------------------------
Soumith's Benchmarks (The times are the sum of 5 trials)
----------------------------------------------------------------
ms: 0.066048
ms: 0.028448
ms: 0.027008
ms: 0.026304
ms: 0.026624
CatArray for 32 1x600 Tensors along dim=1 took 7.6389312744141e-05 seconds
ms: 0.041856
Sum of 10 trials used to generate results
----------------------------------------------------------------
CatArray for 2 size 1 1D Tensors took 0.0005650520324707 seconds.
CatArray for 4 size 1 1D Tensors took 0.00078177452087402 seconds.
CatArray for 8 size 1 1D Tensors took 0.0015158653259277 seconds.
CatArray for 32 size 1 1D Tensors took 0.0047669410705566 seconds.
CatArray for 128 size 1 1D Tensors took 0.021991968154907 seconds.
CatArray for 512 size 1 1D Tensors took 0.079319953918457 seconds.
CatArray for 1024 size 1 1D Tensors took 0.15872097015381 seconds.
CatArray for 2048 size 1 1D Tensors took 0.30642604827881 seconds.
----------------------------------------------------------------
Soumith's Benchmarks (The times are the sum of 5 trials)
----------------------------------------------------------------
ms: 0.280992
ms: 0.218592
ms: 0.218208
ms: 0.253888
ms: 0.243072
CatArray for 32 1x600 Tensors along dim=1 took 0.00028080940246582 seconds
ms: 0.898176
Sum of 10 trials used to generate results
----------------------------------------------------------------
CatArray for 2 size 1 1D Tensors took 0.00030994415283203 seconds.
CatArray for 4 size 1 1D Tensors took 0.00022506713867188 seconds.
CatArray for 8 size 1 1D Tensors took 0.00023889541625977 seconds.
CatArray for 32 size 1 1D Tensors took 0.00027704238891602 seconds.
CatArray for 128 size 1 1D Tensors took 0.00044894218444824 seconds.
CatArray for 512 size 1 1D Tensors took 0.0012199878692627 seconds.
CatArray for 1024 size 1 1D Tensors took 0.0020511150360107 seconds.
CatArray for 2048 size 1 1D Tensors took 0.0042030811309814 seconds.
Sum of 10 trials used to generate results
----------------------------------------------------------------
CatArray for 2 size 1 1D Tensors took 0.00037097930908203 seconds.
CatArray for 4 size 1 1D Tensors took 0.00036001205444336 seconds.
CatArray for 8 size 1 1D Tensors took 0.00064897537231445 seconds.
CatArray for 32 size 1 1D Tensors took 0.0023789405822754 seconds.
CatArray for 128 size 1 1D Tensors took 0.0089218616485596 seconds.
CatArray for 512 size 1 1D Tensors took 0.036385059356689 seconds.
CatArray for 1024 size 1 1D Tensors took 0.075536012649536 seconds.
CatArray for 2048 size 1 1D Tensors took 0.14832401275635 seconds.