Last active
December 5, 2019 10:21
-
-
Save atinfinity/49ff9dbf0bb617331cc07d35cd8a5e66 to your computer and use it in GitHub Desktop.
GpuMatのcudaMallocPitchが遅くなる再現コード
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <opencv2/core.hpp> | |
#include <opencv2/core/cuda.hpp> | |
#include <cuda_runtime.h> | |
#include <device_launch_parameters.h> | |
#include <iostream> | |
int main(int argc, const char * argv[]) | |
{ | |
cudaFree(0); // dummy call | |
const size_t width = 256; | |
const size_t height = 256; | |
const size_t elemSize = 12; | |
for (int i = 0; i < 5; i++) | |
{ | |
#if 1 | |
cv::cuda::GpuMat d_img(cv::Size(width, height), CV_32FC3); // cudaMallocPitch is slow(only first call) | |
#else | |
size_t step = 0; | |
unsigned char *data = NULL; | |
cudaMallocPitch(&data, &step, elemSize * width, height); | |
cudaFree(data); | |
#endif | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
検証環境
nvprofログ
GpuMat
cudaMallocPitch直呼び
cudaMallocPitchの引数はGpuMat内部で呼ばれているものと合わせる.
GpuMatインスタンス生成時に呼ばれるcudaMallocPitchが遅い理由は一体・・・