Created
August 7, 2018 08:29
-
-
Save nixeneko/27c539801a714f02b5b5714adccf2ff9 to your computer and use it in GitHub Desktop.
OpenCV/DNN object detection (Darknet YOLOv3) test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 as cv | |
import argparse | |
import sys | |
import numpy as np | |
MODEL = "yolov3-face_final.weights" | |
CFG = "yolov3-face.cfg" | |
SCALE = 0.00392 # 1/255, 入力のスケール | |
INP_SHAPE = (416, 416) #入力サイズ | |
MEAN = 0 | |
RGB = True | |
# Load a network | |
#net = cv.dnn.readNet(args.model, args.config, "darknet") | |
net = cv.dnn.readNetFromDarknet(CFG, MODEL) | |
#backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV) | |
net.setPreferableBackend(cv.dnn.DNN_BACKEND_DEFAULT) | |
#targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD) | |
net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) | |
confThreshold = 0.5 # Confidence threshold | |
nmsThreshold = 0.4 # Non-maximum supression threshold | |
def getOutputsNames(net): | |
layersNames = net.getLayerNames() | |
return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()] | |
def postprocess(frame, outs): | |
frameHeight = frame.shape[0] | |
frameWidth = frame.shape[1] | |
def drawPred(classId, conf, left, top, right, bottom): | |
left = int(left) | |
top = int(top) | |
right = int(right) | |
bottom = int(bottom) | |
# Draw a bounding box. | |
cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0)) | |
label = '%.2f' % conf | |
labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) | |
top = max(top, labelSize[1]) | |
cv.rectangle(frame, (left, top - labelSize[1]), (left + labelSize[0], top + baseLine), (255, 255, 255), cv.FILLED) | |
cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) | |
layerNames = net.getLayerNames() | |
lastLayerId = net.getLayerId(layerNames[-1]) | |
lastLayer = net.getLayer(lastLayerId) | |
classIds = [] | |
confidences = [] | |
boxes = [] | |
if lastLayer.type == 'Region': | |
# Network produces output blob with a shape NxC where N is a number of | |
# detected objects and C is a number of classes + 4 where the first 4 | |
# numbers are [center_x, center_y, width, height] | |
classIds = [] | |
confidences = [] | |
boxes = [] | |
for out in outs: | |
for detection in out: | |
scores = detection[5:] | |
classId = np.argmax(scores) | |
confidence = scores[classId] | |
if confidence > confThreshold: | |
center_x = int(detection[0] * frameWidth) | |
center_y = int(detection[1] * frameHeight) | |
width = int(detection[2] * frameWidth) | |
height = int(detection[3] * frameHeight) | |
left = center_x - width / 2 | |
top = center_y - height / 2 | |
classIds.append(classId) | |
confidences.append(float(confidence)) | |
boxes.append([left, top, width, height]) | |
else: | |
print('Unknown output layer type: ' + lastLayer.type) | |
exit() | |
indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold) | |
for i in indices: | |
i = i[0] | |
box = boxes[i] | |
left = box[0] | |
top = box[1] | |
width = box[2] | |
height = box[3] | |
drawPred(classIds[i], confidences[i], left, top, left + width, top + height) | |
# Process inputs | |
winName = 'Deep learning object detection in OpenCV' | |
cv.namedWindow(winName, cv.WINDOW_NORMAL) | |
cap = cv.VideoCapture(0) | |
while cv.waitKey(1) < 0: | |
hasFrame, frame = cap.read() | |
if not hasFrame: | |
cv.waitKey() | |
break | |
frameHeight = frame.shape[0] | |
frameWidth = frame.shape[1] | |
# Create a 4D blob from a frame. | |
inpWidth = INP_SHAPE[0] | |
inpHeight = INP_SHAPE[1] | |
blob = cv.dnn.blobFromImage(frame, SCALE, (inpWidth, inpHeight), MEAN, RGB, crop=False) | |
# Run a model | |
net.setInput(blob) | |
outs = net.forward(getOutputsNames(net)) | |
postprocess(frame, outs) | |
# Put efficiency information. | |
t, _ = net.getPerfProfile() | |
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency()) | |
cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0)) | |
cv.imshow(winName, frame) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment