Created
November 7, 2019 01:32
-
-
Save nvnnghia/5ccd29ea8198d241f98211638e13f97d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include "common.h" | |
#include "cudaUtility.h" | |
#include "mathFunctions.h" | |
#include "pluginImplement.h" | |
#include "tensorNet.h" | |
#include "loadImage.h" | |
#include "imageBuffer.h" | |
#include <chrono> | |
#include <thread> | |
#include <chrono> | |
const char* model = "model/pelee/pelee_deploy_iplugin.prototxt"; | |
const char* weight = "model/pelee/pelee_merged.caffemodel"; | |
const char* INPUT_BLOB_NAME = "data"; | |
const char* OUTPUT_BLOB_NAME = "detection_out"; | |
static const uint32_t BATCH_SIZE = 2; | |
//image buffer size = 10 | |
//dropFrame = false | |
ConsumerProducerQueue<cv::Mat> *imageBuffer = new ConsumerProducerQueue<cv::Mat>(10,false); | |
class Timer { | |
public: | |
void tic() { | |
start_ticking_ = true; | |
start_ = std::chrono::high_resolution_clock::now(); | |
} | |
void toc() { | |
if (!start_ticking_)return; | |
end_ = std::chrono::high_resolution_clock::now(); | |
start_ticking_ = false; | |
t = std::chrono::duration<double, std::milli>(end_ - start_).count(); | |
//std::cout << "Time: " << t << " ms" << std::endl; | |
} | |
double t; | |
private: | |
bool start_ticking_ = false; | |
std::chrono::time_point<std::chrono::high_resolution_clock> start_; | |
std::chrono::time_point<std::chrono::high_resolution_clock> end_; | |
}; | |
/* * | |
* @TODO: unifiedMemory is used here under -> ( cudaMallocManaged ) | |
* */ | |
float* allocateMemory(DimsCHW dims, char* info) | |
{ | |
float* ptr; | |
size_t size; | |
std::cout << "Allocate memory: " << info << std::endl; | |
size = BATCH_SIZE * dims.c() * dims.h() * dims.w(); | |
assert(!cudaMallocManaged( &ptr, size*sizeof(float))); | |
return ptr; | |
} | |
void loadImg( cv::Mat &input1, cv::Mat &input2, int re_width, int re_height, float *data_unifrom,const float3 mean,const float scale ) | |
{ | |
int i; | |
int j; | |
int line_offset; | |
int offset_g; | |
int offset_r; | |
cv::Mat dst1; | |
cv::Mat dst2; | |
unsigned char *line1 = NULL; | |
unsigned char *line2 = NULL; | |
float *unifrom_data = data_unifrom; | |
cv::resize( input1, dst1, cv::Size( re_width, re_height ), (0.0), (0.0), cv::INTER_LINEAR ); | |
cv::resize( input2, dst2, cv::Size( re_width, re_height ), (0.0), (0.0), cv::INTER_LINEAR ); | |
offset_g = re_width * re_height; | |
offset_r = re_width * re_height * 2; | |
for( i = 0; i < re_height; ++i ) | |
{ | |
line1 = dst1.ptr< unsigned char >( i ); | |
line2 = dst2.ptr< unsigned char >( i ); | |
line_offset = i * re_width; | |
for( j = 0; j < re_width; ++j ) | |
{ | |
//first image | |
// b | |
unifrom_data[ line_offset + j ] = (( float )(line1[ j * 3 ] - mean.x) * scale); | |
// g | |
unifrom_data[ offset_g + line_offset + j ] = (( float )(line1[ j * 3 + 1 ] - mean.y) * scale); | |
// r | |
unifrom_data[ offset_r + line_offset + j ] = (( float )(line1[ j * 3 + 2 ] - mean.z) * scale); | |
//second image | |
// b | |
unifrom_data[ line_offset + j + re_width * re_height * 3] = (( float )(line2[ j * 3 ] - mean.x) * scale); | |
// g | |
unifrom_data[ offset_g + line_offset + j + re_width * re_height * 3] = (( float )(line2[ j * 3 + 1 ] - mean.y) * scale); | |
// r | |
unifrom_data[ offset_r + line_offset + j + re_width * re_height * 3] = (( float )(line2[ j * 3 + 2 ] - mean.z) * scale); | |
} | |
} | |
} | |
//thread read video | |
void readPicture() | |
{ | |
cv::VideoCapture cap("testVideo/test.avi"); | |
// cv::VideoCapture cap("/data1/lhi/SANDISK128/nvnn/segmentation/driving_Korea/Danyang/anno_videos/vlc-record-2019-10-30-09h18m05s-driving_in_Korea_Danyang.mp4-.mp4"); | |
cv::Mat image; | |
while(cap.isOpened()) | |
{ | |
cap >> image; | |
imageBuffer->add(image); | |
} | |
} | |
int main(int argc, char *argv[]) | |
{ | |
std::vector<std::string> output_vector = {OUTPUT_BLOB_NAME}; | |
TensorNet tensorNet; | |
tensorNet.LoadNetwork(model,weight,INPUT_BLOB_NAME, output_vector,BATCH_SIZE); | |
DimsCHW dimsData = tensorNet.getTensorDims(INPUT_BLOB_NAME); | |
DimsCHW dimsOut = tensorNet.getTensorDims(OUTPUT_BLOB_NAME); | |
float* data = allocateMemory( dimsData , (char*)"input blob"); | |
std::cout << "allocate data" << std::endl; | |
float* output = allocateMemory( dimsOut , (char*)"output blob"); | |
std::cout << "allocate output" << std::endl; | |
int height = 304; | |
int width = 304; | |
cv::Mat frame,srcImg, fl_frame; | |
void* imgCPU; | |
void* imgCUDA; | |
Timer timer; | |
int count =0; | |
std::thread readTread(readPicture); | |
readTread.detach(); | |
cv::VideoWriter writer; | |
while(1) | |
{ | |
count ++; | |
imageBuffer->consume(frame); | |
auto start = std::chrono::system_clock::now(); | |
srcImg = frame.clone(); | |
cv::resize(frame, frame, cv::Size(304,304)); | |
cv::flip(frame,fl_frame,1); | |
const size_t size = width * height * sizeof(float3); | |
if( CUDA_FAILED( cudaMalloc( &imgCUDA, 2*size)) ) | |
{ | |
cout <<"Cuda Memory allocation error occured."<<endl; | |
return false; | |
} | |
void* imgData = malloc(2*size); | |
memset(imgData,0,2*size); | |
loadImg(frame,fl_frame,height,width,(float*)imgData,make_float3(103.94,116.78,123.68),0.017); | |
cudaMemcpyAsync(imgCUDA,imgData,2*size,cudaMemcpyHostToDevice); | |
void* buffers[] = { imgCUDA, output }; | |
timer.tic(); | |
tensorNet.imageInference( buffers, output_vector.size() + 1, BATCH_SIZE); | |
timer.toc(); | |
double msTime = timer.t; | |
vector<vector<float> > detections; | |
for (int k=0; k<100; k++) | |
{ | |
if(output[7*k+1] == -1) | |
break; | |
float classIndex = output[7*k+1]; | |
float confidence = output[7*k+2]; | |
float xmin = output[7*k + 3]; | |
float ymin = output[7*k + 4]; | |
float xmax = output[7*k + 5]; | |
float ymax = output[7*k + 6]; | |
//std::cout << classIndex << " , " << confidence << " , " << xmin << " , " << ymin<< " , " << xmax<< " , " << ymax << std::endl; | |
int x1 = static_cast<int>(xmin * srcImg.cols); | |
int y1 = static_cast<int>(ymin * srcImg.rows); | |
int x2 = static_cast<int>(xmax * srcImg.cols); | |
int y2 = static_cast<int>(ymax * srcImg.rows); | |
cv::rectangle(srcImg,cv::Rect2f(cv::Point(x1,y1),cv::Point(x2,y2)),cv::Scalar(255,0,255),1); | |
} | |
cv::Size size1; | |
size1.width = srcImg.cols; | |
size1.height = srcImg.rows; | |
auto end = std::chrono::system_clock::now(); | |
std::chrono::duration<double> elapsed_seconds = end-start; | |
float duration = elapsed_seconds.count()*1000; | |
std::cout<<"Network processing time: "<<duration<< std::endl; | |
if (count == 1) { | |
char fname[256]; | |
sprintf(fname,"result.wmv"); | |
printf(fname); | |
writer.open(fname, cv::VideoWriter::fourcc('M', 'P', '4', 'V'), 20, size1); | |
} | |
else { | |
//cv::cvtColor(seg_gray, seg_gray, CV_BGR2GRAY); | |
writer << srcImg; | |
} | |
free(imgData); | |
} | |
writer.release(); | |
cudaFree(imgCUDA); | |
cudaFreeHost(imgCPU); | |
cudaFree(output); | |
tensorNet.destroy(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment