-
-
Save taka-wang/ab6c8783a910c4772817487a53a6eff7 to your computer and use it in GitHub Desktop.
YOLOv4 inference using OpenCV DNN
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import time | |
CONFIDENCE_THRESHOLD = 0.2 | |
NMS_THRESHOLD = 0.4 | |
COLORS = [(0, 255, 255), (255, 255, 0), (0, 255, 0), (255, 0, 0)] | |
class_names = [] | |
with open("classes.txt", "r") as f: | |
class_names = [cname.strip() for cname in f.readlines()] | |
vc = cv2.VideoCapture("demo.mp4") | |
net = cv2.dnn.readNet("yolov4.weights", "yolov4.cfg") | |
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA) | |
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16) | |
model = cv2.dnn_DetectionModel(net) | |
model.setInputParams(size=(416, 416), scale=1/255, swapRB=True) | |
while cv2.waitKey(1) < 1: | |
(grabbed, frame) = vc.read() | |
if not grabbed: | |
exit() | |
start = time.time() | |
classes, scores, boxes = model.detect(frame, CONFIDENCE_THRESHOLD, NMS_THRESHOLD) | |
end = time.time() | |
start_drawing = time.time() | |
for (classid, score, box) in zip(classes, scores, boxes): | |
color = COLORS[int(classid) % len(COLORS)] | |
label = "%s : %f" % (class_names[classid[0]], score) | |
cv2.rectangle(frame, box, color, 2) | |
cv2.putText(frame, label, (box[0], box[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2) | |
end_drawing = time.time() | |
fps_label = "FPS: %.2f (excluding drawing time of %.2fms)" % (1 / (end - start), (end_drawing - start_drawing) * 1000) | |
cv2.putText(frame, fps_label, (0, 25), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2) | |
cv2.imshow("detections", frame) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <queue> | |
#include <iterator> | |
#include <sstream> | |
#include <fstream> | |
#include <iomanip> | |
#include <chrono> | |
#include <opencv2/core.hpp> | |
#include <opencv2/dnn.hpp> | |
#include <opencv2/dnn/all_layers.hpp> | |
#include <opencv2/imgproc.hpp> | |
#include <opencv2/highgui.hpp> | |
constexpr float CONFIDENCE_THRESHOLD = 0; | |
constexpr float NMS_THRESHOLD = 0.4; | |
constexpr int NUM_CLASSES = 80; | |
// colors for bounding boxes | |
const cv::Scalar colors[] = { | |
{0, 255, 255}, | |
{255, 255, 0}, | |
{0, 255, 0}, | |
{255, 0, 0} | |
}; | |
const auto NUM_COLORS = sizeof(colors)/sizeof(colors[0]); | |
int main() | |
{ | |
std::vector<std::string> class_names; | |
{ | |
std::ifstream class_file("classes.txt"); | |
if (!class_file) | |
{ | |
std::cerr << "failed to open classes.txt\n"; | |
return 0; | |
} | |
std::string line; | |
while (std::getline(class_file, line)) | |
class_names.push_back(line); | |
} | |
cv::VideoCapture source("demo.mp4"); | |
auto net = cv::dnn::readNetFromDarknet("yolov4.cfg", "yolov4.weights"); | |
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA); | |
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA); | |
// net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV); | |
// net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU); | |
auto output_names = net.getUnconnectedOutLayersNames(); | |
cv::Mat frame, blob; | |
std::vector<cv::Mat> detections; | |
while(cv::waitKey(1) < 1) | |
{ | |
source >> frame; | |
if (frame.empty()) | |
{ | |
cv::waitKey(); | |
break; | |
} | |
auto total_start = std::chrono::steady_clock::now(); | |
cv::dnn::blobFromImage(frame, blob, 0.00392, cv::Size(608, 608), cv::Scalar(), true, false, CV_32F); | |
net.setInput(blob); | |
auto dnn_start = std::chrono::steady_clock::now(); | |
net.forward(detections, output_names); | |
auto dnn_end = std::chrono::steady_clock::now(); | |
std::vector<int> indices[NUM_CLASSES]; | |
std::vector<cv::Rect> boxes[NUM_CLASSES]; | |
std::vector<float> scores[NUM_CLASSES]; | |
for (auto& output : detections) | |
{ | |
const auto num_boxes = output.rows; | |
for (int i = 0; i < num_boxes; i++) | |
{ | |
auto x = output.at<float>(i, 0) * frame.cols; | |
auto y = output.at<float>(i, 1) * frame.rows; | |
auto width = output.at<float>(i, 2) * frame.cols; | |
auto height = output.at<float>(i, 3) * frame.rows; | |
cv::Rect rect(x - width/2, y - height/2, width, height); | |
for (int c = 0; c < NUM_CLASSES; c++) | |
{ | |
auto confidence = *output.ptr<float>(i, 5 + c); | |
if (confidence >= CONFIDENCE_THRESHOLD) | |
{ | |
boxes[c].push_back(rect); | |
scores[c].push_back(confidence); | |
} | |
} | |
} | |
} | |
for (int c = 0; c < NUM_CLASSES; c++) | |
cv::dnn::NMSBoxes(boxes[c], scores[c], 0.0, NMS_THRESHOLD, indices[c]); | |
for (int c= 0; c < NUM_CLASSES; c++) | |
{ | |
for (size_t i = 0; i < indices[c].size(); ++i) | |
{ | |
const auto color = colors[c % NUM_COLORS]; | |
auto idx = indices[c][i]; | |
const auto& rect = boxes[c][idx]; | |
cv::rectangle(frame, cv::Point(rect.x, rect.y), cv::Point(rect.x + rect.width, rect.y + rect.height), color, 3); | |
std::ostringstream label_ss; | |
label_ss << class_names[c] << ": " << std::fixed << std::setprecision(2) << scores[c][idx]; | |
auto label = label_ss.str(); | |
int baseline; | |
auto label_bg_sz = cv::getTextSize(label.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline); | |
cv::rectangle(frame, cv::Point(rect.x, rect.y - label_bg_sz.height - baseline - 10), cv::Point(rect.x + label_bg_sz.width, rect.y), color, cv::FILLED); | |
cv::putText(frame, label.c_str(), cv::Point(rect.x, rect.y - baseline - 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(0, 0, 0)); | |
} | |
} | |
auto total_end = std::chrono::steady_clock::now(); | |
float inference_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(dnn_end - dnn_start).count(); | |
float total_fps = 1000.0 / std::chrono::duration_cast<std::chrono::milliseconds>(total_end - total_start).count(); | |
std::ostringstream stats_ss; | |
stats_ss << std::fixed << std::setprecision(2); | |
stats_ss << "Inference FPS: " << inference_fps << ", Total FPS: " << total_fps; | |
auto stats = stats_ss.str(); | |
int baseline; | |
auto stats_bg_sz = cv::getTextSize(stats.c_str(), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, 1, &baseline); | |
cv::rectangle(frame, cv::Point(0, 0), cv::Point(stats_bg_sz.width, stats_bg_sz.height + 10), cv::Scalar(0, 0, 0), cv::FILLED); | |
cv::putText(frame, stats.c_str(), cv::Point(0, stats_bg_sz.height + 5), cv::FONT_HERSHEY_COMPLEX_SMALL, 1, cv::Scalar(255, 255, 255)); | |
cv::namedWindow("output"); | |
cv::imshow("output", frame); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment