nixeneko · August 7, 2018 08:29
diff --git a/face_detection.py b/face_detection.py
 import cv2 as cv
 import argparse
 import sys
 import numpy as np

 MODEL = "yolov3-face_final.weights"
 CFG = "yolov3-face.cfg"
 SCALE = 0.00392 # 1/255, 入力のスケール
 INP_SHAPE = (416, 416) #入力サイズ
 MEAN = 0
 RGB = True
        
 # Load a network
 #net = cv.dnn.readNet(args.model, args.config, "darknet")
 net = cv.dnn.readNetFromDarknet(CFG, MODEL)
 #backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
 net.setPreferableBackend(cv.dnn.DNN_BACKEND_DEFAULT)
 #targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
 net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

 confThreshold = 0.5 # Confidence threshold
 nmsThreshold = 0.4  # Non-maximum supression threshold


 def getOutputsNames(net):
    layersNames = net.getLayerNames()
    return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

 def postprocess(frame, outs):
    frameHeight = frame.shape[0]
    frameWidth = frame.shape[1]

    def drawPred(classId, conf, left, top, right, bottom):
        left = int(left)
        top = int(top)
        right = int(right)
        bottom = int(bottom)
    # Draw a bounding box.
        cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0))

        label = '%.2f' % conf

        labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
        top = max(top, labelSize[1])
        cv.rectangle(frame, (left, top - labelSize[1]), (left + labelSize[0], top + baseLine), (255, 255, 255), cv.FILLED)
        cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))

    layerNames = net.getLayerNames()
    lastLayerId = net.getLayerId(layerNames[-1])
    lastLayer = net.getLayer(lastLayerId)

    classIds = []
    confidences = []
    boxes = []

    if lastLayer.type == 'Region':
        # Network produces output blob with a shape NxC where N is a number of
        # detected objects and C is a number of classes + 4 where the first 4
        # numbers are [center_x, center_y, width, height]
        classIds = []
        confidences = []
        boxes = []
        for out in outs:
            for detection in out:
                scores = detection[5:]
                classId = np.argmax(scores)
                confidence = scores[classId]
                if confidence > confThreshold:
                    center_x = int(detection[0] * frameWidth)
                    center_y = int(detection[1] * frameHeight)
                    width = int(detection[2] * frameWidth)
                    height = int(detection[3] * frameHeight)
                    left = center_x - width / 2
                    top = center_y - height / 2
                    classIds.append(classId)
                    confidences.append(float(confidence))
                    boxes.append([left, top, width, height])
    else:
        print('Unknown output layer type: ' + lastLayer.type)
        exit()

    indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
    for i in indices:
        i = i[0]
        box = boxes[i]
        left = box[0]
        top = box[1]
        width = box[2]
        height = box[3]
        drawPred(classIds[i], confidences[i], left, top, left + width, top + height)

 # Process inputs
 winName = 'Deep learning object detection in OpenCV'
 cv.namedWindow(winName, cv.WINDOW_NORMAL)

 cap = cv.VideoCapture(0)
 while cv.waitKey(1) < 0:
    hasFrame, frame = cap.read()
    if not hasFrame:
        cv.waitKey()
        break

    frameHeight = frame.shape[0]
    frameWidth = frame.shape[1]

    # Create a 4D blob from a frame.
    inpWidth = INP_SHAPE[0]
    inpHeight = INP_SHAPE[1]
    blob = cv.dnn.blobFromImage(frame, SCALE, (inpWidth, inpHeight), MEAN, RGB, crop=False)

    # Run a model
    net.setInput(blob)
    outs = net.forward(getOutputsNames(net))

    postprocess(frame, outs)

    # Put efficiency information.
    t, _ = net.getPerfProfile()
    label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
    cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))

    cv.imshow(winName, frame)
	import cv2 as cv
	import argparse
	import sys
	import numpy as np

	MODEL = "yolov3-face_final.weights"
	CFG = "yolov3-face.cfg"
	SCALE = 0.00392 # 1/255, 入力のスケール
	INP_SHAPE = (416, 416) #入力サイズ
	MEAN = 0
	RGB = True

	# Load a network
	#net = cv.dnn.readNet(args.model, args.config, "darknet")
	net = cv.dnn.readNetFromDarknet(CFG, MODEL)
	#backends = (cv.dnn.DNN_BACKEND_DEFAULT, cv.dnn.DNN_BACKEND_HALIDE, cv.dnn.DNN_BACKEND_INFERENCE_ENGINE, cv.dnn.DNN_BACKEND_OPENCV)
	net.setPreferableBackend(cv.dnn.DNN_BACKEND_DEFAULT)
	#targets = (cv.dnn.DNN_TARGET_CPU, cv.dnn.DNN_TARGET_OPENCL, cv.dnn.DNN_TARGET_OPENCL_FP16, cv.dnn.DNN_TARGET_MYRIAD)
	net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU)

	confThreshold = 0.5 # Confidence threshold
	nmsThreshold = 0.4 # Non-maximum supression threshold


	def getOutputsNames(net):
	layersNames = net.getLayerNames()
	return [layersNames[i[0] - 1] for i in net.getUnconnectedOutLayers()]

	def postprocess(frame, outs):
	frameHeight = frame.shape[0]
	frameWidth = frame.shape[1]

	def drawPred(classId, conf, left, top, right, bottom):
	left = int(left)
	top = int(top)
	right = int(right)
	bottom = int(bottom)
	# Draw a bounding box.
	cv.rectangle(frame, (left, top), (right, bottom), (0, 255, 0))

	label = '%.2f' % conf

	labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1)
	top = max(top, labelSize[1])
	cv.rectangle(frame, (left, top - labelSize[1]), (left + labelSize[0], top + baseLine), (255, 255, 255), cv.FILLED)
	cv.putText(frame, label, (left, top), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))

	layerNames = net.getLayerNames()
	lastLayerId = net.getLayerId(layerNames[-1])
	lastLayer = net.getLayer(lastLayerId)

	classIds = []
	confidences = []
	boxes = []

	if lastLayer.type == 'Region':
	# Network produces output blob with a shape NxC where N is a number of
	# detected objects and C is a number of classes + 4 where the first 4
	# numbers are [center_x, center_y, width, height]
	classIds = []
	confidences = []
	boxes = []
	for out in outs:
	for detection in out:
	scores = detection[5:]
	classId = np.argmax(scores)
	confidence = scores[classId]
	if confidence > confThreshold:
	center_x = int(detection[0] * frameWidth)
	center_y = int(detection[1] * frameHeight)
	width = int(detection[2] * frameWidth)
	height = int(detection[3] * frameHeight)
	left = center_x - width / 2
	top = center_y - height / 2
	classIds.append(classId)
	confidences.append(float(confidence))
	boxes.append([left, top, width, height])
	else:
	print('Unknown output layer type: ' + lastLayer.type)
	exit()

	indices = cv.dnn.NMSBoxes(boxes, confidences, confThreshold, nmsThreshold)
	for i in indices:
	i = i[0]
	box = boxes[i]
	left = box[0]
	top = box[1]
	width = box[2]
	height = box[3]
	drawPred(classIds[i], confidences[i], left, top, left + width, top + height)

	# Process inputs
	winName = 'Deep learning object detection in OpenCV'
	cv.namedWindow(winName, cv.WINDOW_NORMAL)

	cap = cv.VideoCapture(0)
	while cv.waitKey(1) < 0:
	hasFrame, frame = cap.read()
	if not hasFrame:
	cv.waitKey()
	break

	frameHeight = frame.shape[0]
	frameWidth = frame.shape[1]

	# Create a 4D blob from a frame.
	inpWidth = INP_SHAPE[0]
	inpHeight = INP_SHAPE[1]
	blob = cv.dnn.blobFromImage(frame, SCALE, (inpWidth, inpHeight), MEAN, RGB, crop=False)

	# Run a model
	net.setInput(blob)
	outs = net.forward(getOutputsNames(net))

	postprocess(frame, outs)

	# Put efficiency information.
	t, _ = net.getPerfProfile()
	label = 'Inference time: %.2f ms' % (t * 1000.0 / cv.getTickFrequency())
	cv.putText(frame, label, (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))

	cv.imshow(winName, frame)