lucydjo · April 17, 2020 19:40
diff --git a/webcam_yolo_cuda.py b/webcam_yolo_cuda.py
 import numpy as np
 import cv2
 import os
 import time
 import signal
 import timeit
 import time
 import sys
 from termcolor import colored

 cap = cv2.VideoCapture(0)

 # Config
 YOLO_DIRECTORY = "../models"
 CONFIDENCE = 0.36
 THRESHOLD = 0.22

 ACTIVATION_RANGE = 800

 # load the COCO class labels our YOLO model was trained on
 labelsPath = os.path.sep.join([YOLO_DIRECTORY, "coco.names"])
 LABELS = open(labelsPath).read().strip().split("\n")

 # initialize a list of colors to represent each possible class label
 np.random.seed(42)
 COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),
                           dtype="uint8")

 # derive the paths to the YOLO weights and model configuration
 weightsPath = os.path.sep.join([YOLO_DIRECTORY, "yolov3.weights"])
 configPath = os.path.sep.join([YOLO_DIRECTORY, "yolov3.cfg"])

 # Wait for buffering
 time.sleep(0.4)

 # load our YOLO object detector trained on COCO dataset (80 classes)
 # and determine only the *output* layer names that we need from YOLO
 print("[INFO] loading neural-network from disk...")
 print(cv2.__version__)
 net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

 #net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
 #net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)

 net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
 net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)

 ln = net.getLayerNames()
 ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
 # loop over frames from the video file stream

 # Handle Ctrl+C in terminal, release pointers
 def signal_handler(sig, frame):
    # release the file pointers
    print("\n[INFO] cleaning up...")
    cv2.destroyAllWindows()
    sys.exit(0)

 signal.signal(signal.SIGINT, signal_handler)

 # Test for GPU support
 build_info = str("".join(cv2.getBuildInformation().split()))
 if cv2.ocl.haveOpenCL():
    cv2.ocl.setUseOpenCL(True)
    cv2.ocl.useOpenCL()
    print(colored("[OKAY] OpenCL is working!", "green"))
 else:
    print(
        colored("[WARNING] OpenCL acceleration is disabled!", "yellow"))
 if "CUDA:YES" in build_info:
    print(colored("[OKAY] CUDA is working!", "green"))
    #print(build_info)
 else:
    print(
        colored("[WARNING] CUDA acceleration is disabled!", "yellow"))
 print()

 while(True):
    
    # Capture frame-by-frame
    start_time = timeit.default_timer()
    ret, frame = cap.read()
    
    H, W, channels = frame.shape
    
    # Our operations on the frame come here
    #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2BGR)
    frame = cv2.UMat(frame)
    
    #32 64 96 128 160 192 224 256 288 320 352 384 416 448 480
    #512 544 576 608 640 672 704 736 768 800 832 864 896 928 960 992 1024
    blob = cv2.dnn.blobFromImage(frame, 1 / 260, (416, 416), swapRB=False, crop=False)
    net.setInput(blob)
    layerOutputs = net.forward(ln)
    
    boxes = []
    confidences = []
    classIDs = []
    
    # loop over each of the layer outputs
    for output in layerOutputs:
        # loop over each of the detections
        for detection in output:
            # extract the class ID and confidence (i.e., probability)
            # of the current object detection
            scores = detection[5:]
            classID = np.argmax(scores)
            # confidence = scores[classID]
            #classID = 2  # person = 0
            confidence = scores[classID]

            # filter out weak predictions by ensuring the detected
            # probability is greater than the minimum probability
            if confidence > CONFIDENCE:
                # scale the bounding box coordinates back relative to
                # the size of the image, keeping in mind that YOLO
                # actually returns the center (x, y)-coordinates of
                # the bounding box followed by the boxes' width and
                # height
                #box = detection[0: 4] = box.astype("int")
                
                
                box = detection[0: 4] * np.array([W, H, W, H])
                (centerX, centerY, width, height) = box.astype("int")

                x = int(centerX - (width / 2))
                y = int(centerY - (height / 2))
                

                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)

    # apply non-maxima suppression to suppress weak, overlapping
    # bounding boxes
    idxs = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE, THRESHOLD)
    
    # ensure at least one detection exists
    if len(idxs) > 0:
        # Find best player match
        bestMatch = confidences[np.argmax(confidences)]

        # loop over the indexes we are keeping
        for i in idxs.flatten():
            # extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])

            # draw target dot on the frame
            #cv2.circle(frame, (int(x + w / 2), int(y + h / 5)), 5, (0, 0, 255), -1)

            # draw a bounding box rectangle and label on the frame
            color = [int(c) for c in COLORS[classIDs[i]]]
            cv2.rectangle(frame, (x, y),
                            (x + w, y + h), (0, 0, 255), 2)

            text = "TARGET {}%".format(int(confidences[i] * 100))
            cv2.putText(frame, text, (x, y - 5),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    
    # Display the resulting frame
    cv2.imshow('frame',frame)
    
    elapsed = timeit.default_timer() - start_time
    sys.stdout.write(
        "\r{1} FPS with {0} MS interpolation delay \t".format(int(elapsed * 1000), int(1 / elapsed)))
    sys.stdout.flush()


    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

 # When everything done, release the capture
 cap.release()
 cv2.destroyAllWindows()
	import numpy as np
	import cv2
	import os
	import time
	import signal
	import timeit
	import time
	import sys
	from termcolor import colored

	cap = cv2.VideoCapture(0)

	# Config
	YOLO_DIRECTORY = "../models"
	CONFIDENCE = 0.36
	THRESHOLD = 0.22

	ACTIVATION_RANGE = 800

	# load the COCO class labels our YOLO model was trained on
	labelsPath = os.path.sep.join([YOLO_DIRECTORY, "coco.names"])
	LABELS = open(labelsPath).read().strip().split("\n")

	# initialize a list of colors to represent each possible class label
	np.random.seed(42)
	COLORS = np.random.randint(0, 255, size=(len(LABELS), 3),
	dtype="uint8")

	# derive the paths to the YOLO weights and model configuration
	weightsPath = os.path.sep.join([YOLO_DIRECTORY, "yolov3.weights"])
	configPath = os.path.sep.join([YOLO_DIRECTORY, "yolov3.cfg"])

	# Wait for buffering
	time.sleep(0.4)

	# load our YOLO object detector trained on COCO dataset (80 classes)
	# and determine only the output layer names that we need from YOLO
	print("[INFO] loading neural-network from disk...")
	print(cv2.__version__)
	net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)

	#net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
	#net.setPreferableTarget(cv2.dnn.DNN_TARGET_OPENCL)

	net.setPreferableBackend(cv2.dnn.DNN_BACKEND_CUDA)
	net.setPreferableTarget(cv2.dnn.DNN_TARGET_CUDA_FP16)

	ln = net.getLayerNames()
	ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
	# loop over frames from the video file stream

	# Handle Ctrl+C in terminal, release pointers
	def signal_handler(sig, frame):
	# release the file pointers
	print("\n[INFO] cleaning up...")
	cv2.destroyAllWindows()
	sys.exit(0)

	signal.signal(signal.SIGINT, signal_handler)

	# Test for GPU support
	build_info = str("".join(cv2.getBuildInformation().split()))
	if cv2.ocl.haveOpenCL():
	cv2.ocl.setUseOpenCL(True)
	cv2.ocl.useOpenCL()
	print(colored("[OKAY] OpenCL is working!", "green"))
	else:
	print(
	colored("[WARNING] OpenCL acceleration is disabled!", "yellow"))
	if "CUDA:YES" in build_info:
	print(colored("[OKAY] CUDA is working!", "green"))
	#print(build_info)
	else:
	print(
	colored("[WARNING] CUDA acceleration is disabled!", "yellow"))
	print()

	while(True):

	# Capture frame-by-frame
	start_time = timeit.default_timer()
	ret, frame = cap.read()

	H, W, channels = frame.shape

	# Our operations on the frame come here
	#gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

	frame = cv2.cvtColor(frame, cv2.COLOR_RGBA2BGR)
	frame = cv2.UMat(frame)

	#32 64 96 128 160 192 224 256 288 320 352 384 416 448 480
	#512 544 576 608 640 672 704 736 768 800 832 864 896 928 960 992 1024
	blob = cv2.dnn.blobFromImage(frame, 1 / 260, (416, 416), swapRB=False, crop=False)
	net.setInput(blob)
	layerOutputs = net.forward(ln)

	boxes = []
	confidences = []
	classIDs = []

	# loop over each of the layer outputs
	for output in layerOutputs:
	# loop over each of the detections
	for detection in output:
	# extract the class ID and confidence (i.e., probability)
	# of the current object detection
	scores = detection[5:]
	classID = np.argmax(scores)
	# confidence = scores[classID]
	#classID = 2 # person = 0
	confidence = scores[classID]

	# filter out weak predictions by ensuring the detected
	# probability is greater than the minimum probability
	if confidence > CONFIDENCE:
	# scale the bounding box coordinates back relative to
	# the size of the image, keeping in mind that YOLO
	# actually returns the center (x, y)-coordinates of
	# the bounding box followed by the boxes' width and
	# height
	#box = detection[0: 4] = box.astype("int")


	box = detection[0: 4] * np.array([W, H, W, H])
	(centerX, centerY, width, height) = box.astype("int")

	x = int(centerX - (width / 2))
	y = int(centerY - (height / 2))


	boxes.append([x, y, int(width), int(height)])
	confidences.append(float(confidence))
	classIDs.append(classID)

	# apply non-maxima suppression to suppress weak, overlapping
	# bounding boxes
	idxs = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE, THRESHOLD)

	# ensure at least one detection exists
	if len(idxs) > 0:
	# Find best player match
	bestMatch = confidences[np.argmax(confidences)]

	# loop over the indexes we are keeping
	for i in idxs.flatten():
	# extract the bounding box coordinates
	(x, y) = (boxes[i][0], boxes[i][1])
	(w, h) = (boxes[i][2], boxes[i][3])

	# draw target dot on the frame
	#cv2.circle(frame, (int(x + w / 2), int(y + h / 5)), 5, (0, 0, 255), -1)

	# draw a bounding box rectangle and label on the frame
	color = [int(c) for c in COLORS[classIDs[i]]]
	cv2.rectangle(frame, (x, y),
	(x + w, y + h), (0, 0, 255), 2)

	text = "TARGET {}%".format(int(confidences[i] * 100))
	cv2.putText(frame, text, (x, y - 5),
	cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)


	# Display the resulting frame
	cv2.imshow('frame',frame)

	elapsed = timeit.default_timer() - start_time
	sys.stdout.write(
	"\r{1} FPS with {0} MS interpolation delay \t".format(int(elapsed * 1000), int(1 / elapsed)))
	sys.stdout.flush()


	if cv2.waitKey(1) & 0xFF == ord('q'):
	break

	# When everything done, release the capture
	cap.release()
	cv2.destroyAllWindows()