Created
August 24, 2020 04:20
-
-
Save zst123/0a2203df61c2fce413bf439e7a5fddec to your computer and use it in GitHub Desktop.
Deep Learning Superhero challenge - Carpark System
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys, os, cv2, time | |
import numpy as np, math | |
from argparse import ArgumentParser | |
try: | |
from armv7l.openvino.inference_engine import IENetwork, IEPlugin | |
except: | |
from openvino.inference_engine import IENetwork, IEPlugin | |
# sudo apt-get install python3.6-tk | |
import tkinter as tk | |
# pip3 install pillow | |
from PIL import Image, ImageTk | |
import threading | |
import time | |
m_input_size = 416 | |
yolo_scale_13 = 13 | |
yolo_scale_26 = 26 | |
yolo_scale_52 = 52 | |
classes = 80 | |
coords = 4 | |
num = 3 | |
anchors = [10,13,16,30,33,23,30,61,62,45,59,119,116,90,156,198,373,326] | |
LABELS = ("person", "bicycle", "car", "motorbike", "aeroplane", | |
"bus", "train", "truck", "boat", "traffic light", | |
"fire hydrant", "stop sign", "parking meter", "bench", "bird", | |
"cat", "dog", "horse", "sheep", "cow", | |
"elephant", "bear", "zebra", "giraffe", "backpack", | |
"umbrella", "handbag", "tie", "suitcase", "frisbee", | |
"skis", "snowboard", "sports ball", "kite", "baseball bat", | |
"baseball glove", "skateboard", "surfboard","tennis racket", "bottle", | |
"wine glass", "cup", "fork", "knife", "spoon", | |
"bowl", "banana", "apple", "sandwich", "orange", | |
"broccoli", "carrot", "hot dog", "pizza", "donut", | |
"cake", "chair", "sofa", "pottedplant", "bed", | |
"diningtable", "toilet", "tvmonitor", "laptop", "mouse", | |
"remote", "keyboard", "cell phone", "microwave", "oven", | |
"toaster", "sink", "refrigerator", "book", "clock", | |
"vase", "scissors", "teddy bear", "hair drier", "toothbrush") | |
label_text_color = (0, 0, 255) | |
label_background_color = (125, 175, 75) | |
box_color = (255, 128, 0) | |
box_thickness = 2 | |
circle_color = (0, 255, 0) | |
circle_thickness = 15 | |
def build_argparser(): | |
parser = ArgumentParser() | |
parser.add_argument("-d", "--device", help="Specify the target device to infer on; CPU, GPU, FPGA or MYRIAD is acceptable. \ | |
Sample will look for a suitable plugin for device specified (CPU by default)", default="CPU", type=str) | |
parser.add_argument("-v", "--video", help="Specify video file to use", default="", type=str) | |
return parser | |
def EntryIndex(side, lcoords, lclasses, location, entry): | |
n = int(location / (side * side)) | |
loc = location % (side * side) | |
return int(n * side * side * (lcoords + lclasses + 1) + entry * side * side + loc) | |
class DetectionObject(): | |
xmin = 0 | |
ymin = 0 | |
xmax = 0 | |
ymax = 0 | |
class_id = 0 | |
confidence = 0.0 | |
def __init__(self, x, y, h, w, class_id, confidence, h_scale, w_scale): | |
self.xmin = int((x - w / 2) * w_scale) | |
self.ymin = int((y - h / 2) * h_scale) | |
self.xmax = int(self.xmin + w * w_scale) | |
self.ymax = int(self.ymin + h * h_scale) | |
self.class_id = class_id | |
self.confidence = confidence | |
def IntersectionOverUnion(box_1, box_2): | |
width_of_overlap_area = min(box_1.xmax, box_2.xmax) - max(box_1.xmin, box_2.xmin) | |
height_of_overlap_area = min(box_1.ymax, box_2.ymax) - max(box_1.ymin, box_2.ymin) | |
area_of_overlap = 0.0 | |
if (width_of_overlap_area < 0.0 or height_of_overlap_area < 0.0): | |
area_of_overlap = 0.0 | |
else: | |
area_of_overlap = width_of_overlap_area * height_of_overlap_area | |
box_1_area = (box_1.ymax - box_1.ymin) * (box_1.xmax - box_1.xmin) | |
box_2_area = (box_2.ymax - box_2.ymin) * (box_2.xmax - box_2.xmin) | |
area_of_union = box_1_area + box_2_area - area_of_overlap | |
retval = 0.0 | |
if area_of_union <= 0.0: | |
retval = 0.0 | |
else: | |
retval = (area_of_overlap / area_of_union) | |
return retval | |
def ParseYOLOV3Output(blob, resized_im_h, resized_im_w, original_im_h, original_im_w, threshold, objects): | |
out_blob_h = blob.shape[2] | |
out_blob_w = blob.shape[3] | |
side = out_blob_h | |
anchor_offset = 0 | |
if len(anchors) == 18: ## YoloV3 | |
if side == yolo_scale_13: | |
anchor_offset = 2 * 6 | |
elif side == yolo_scale_26: | |
anchor_offset = 2 * 3 | |
elif side == yolo_scale_52: | |
anchor_offset = 2 * 0 | |
elif len(anchors) == 12: ## tiny-YoloV3 | |
if side == yolo_scale_13: | |
anchor_offset = 2 * 3 | |
elif side == yolo_scale_26: | |
anchor_offset = 2 * 0 | |
else: ## ??? | |
if side == yolo_scale_13: | |
anchor_offset = 2 * 6 | |
elif side == yolo_scale_26: | |
anchor_offset = 2 * 3 | |
elif side == yolo_scale_52: | |
anchor_offset = 2 * 0 | |
side_square = side * side | |
output_blob = blob.flatten() | |
for i in range(side_square): | |
row = int(i / side) | |
col = int(i % side) | |
for n in range(num): | |
obj_index = EntryIndex(side, coords, classes, n * side * side + i, coords) | |
box_index = EntryIndex(side, coords, classes, n * side * side + i, 0) | |
scale = output_blob[obj_index] | |
if (scale < threshold): | |
continue | |
x = (col + output_blob[box_index + 0 * side_square]) / side * resized_im_w | |
y = (row + output_blob[box_index + 1 * side_square]) / side * resized_im_h | |
height = math.exp(output_blob[box_index + 3 * side_square]) * anchors[anchor_offset + 2 * n + 1] | |
width = math.exp(output_blob[box_index + 2 * side_square]) * anchors[anchor_offset + 2 * n] | |
for j in range(classes): | |
class_index = EntryIndex(side, coords, classes, n * side_square + i, coords + 1 + j) | |
prob = scale * output_blob[class_index] | |
if prob < threshold: | |
continue | |
obj = DetectionObject(x, y, height, width, j, prob, (original_im_h / resized_im_h), (original_im_w / resized_im_w)) | |
objects.append(obj) | |
return objects | |
def main_IE_infer(): | |
camera_width = 320 | |
camera_height = 240 | |
fps = "" | |
framepos = 0 | |
frame_count = 0 | |
vidfps = 0 | |
skip_frame = 0 | |
elapsedTime = 0 | |
new_w = int(camera_width * m_input_size/camera_width) | |
new_h = int(camera_height * m_input_size/camera_height) | |
args = build_argparser().parse_args() | |
model_xml = "lrmodels/YoloV3/FP32/frozen_yolo_v3.xml" #<--- CPU | |
#model_xml = "lrmodels/tiny-YoloV3/FP32/frozen_tiny_yolo_v3.xml" #<--- CPU | |
model_bin = os.path.splitext(model_xml)[0] + ".bin" | |
print("model_xml =", model_xml) | |
is_video = args.video and len(args.video) > 0 | |
if is_video: | |
cap = cv2.VideoCapture(args.video) | |
camera_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) | |
camera_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) | |
vidfps = int(cap.get(cv2.CAP_PROP_FPS)) | |
print("videosFrameCount =", str(frame_count)) | |
print("videosFPS =", str(vidfps)) | |
pass | |
else: | |
cap = cv2.VideoCapture(0) | |
cap.set(cv2.CAP_PROP_FPS, 1) | |
cap.set(cv2.CAP_PROP_FRAME_WIDTH, camera_width) | |
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, camera_height) | |
time.sleep(0.1) | |
plugin = IEPlugin(device=args.device) | |
if "CPU" in args.device: | |
plugin.add_cpu_extension("lib/libcpu_extension.so") | |
net = IENetwork(model=model_xml, weights=model_bin) | |
input_blob = next(iter(net.inputs)) | |
exec_net = plugin.load(network=net) | |
# Tkinter thread | |
root = None | |
class MyTkApp(threading.Thread): | |
def __init__(self): | |
threading.Thread.__init__(self) | |
def run(self): | |
self.root = tk.Tk() | |
self.root.wm_title("Carpark Analysis") | |
self.root.protocol("WM_DELETE_WINDOW", self.callback) | |
root = self.root | |
root.geometry("+750+100") # places the window at 200,400 on the screen | |
self.root.mainloop() | |
def callback(self): | |
self.root.quit() | |
detected_points = [] | |
global tkcanvas | |
if is_video: | |
mytk = MyTkApp() | |
mytk.start() | |
time.sleep(0.1) | |
img_path = args.video + '.png' | |
img = Image.open(img_path) | |
photo = ImageTk.PhotoImage(img) | |
tkcanvas = tk.Canvas(root, | |
width=img.size[0], | |
height=img.size[1]) | |
tkcanvas.create_image(0, 0, anchor=tk.NW, image=photo) | |
tkcanvas.pack() | |
global dots | |
dots = [] | |
def update_analysis(): | |
global dots | |
global canvas | |
for item in dots: | |
tkcanvas.delete(item) | |
dots = [] | |
for p in detected_points: | |
x = p[0] | |
y = p[1] | |
r = circle_thickness // 2 | |
x0 = x - r | |
y0 = y - r | |
x1 = x + r | |
y1 = y + r | |
if 80 < y < 245: | |
tx = img.size[0]//2 | |
ty = img.size[1]//2 | |
dots.append(tkcanvas.create_oval(x0, y0, x1, y1, fill="#ff0000")) | |
dots.append(tkcanvas.create_text(tx, ty, fill="darkblue",font="Arial 14 bold", | |
text="Car passing by")) | |
else: | |
dots.append(tkcanvas.create_oval(x0, y0, x1, y1, fill="#00ff00")) | |
print("Starting video") | |
while cap.isOpened(): | |
t1 = time.time() | |
## Uncomment only when playing video files | |
#cap.set(cv2.CAP_PROP_POS_FRAMES, framepos) | |
ret, image = cap.read() | |
if not ret: | |
break | |
resized_image = cv2.resize(image, (new_w, new_h), interpolation = cv2.INTER_CUBIC) | |
canvas = np.full((m_input_size, m_input_size, 3), 128) | |
canvas[(m_input_size-new_h)//2:(m_input_size-new_h)//2 + new_h,(m_input_size-new_w)//2:(m_input_size-new_w)//2 + new_w, :] = resized_image | |
prepimg = canvas | |
prepimg = prepimg[np.newaxis, :, :, :] # Batch size axis add | |
prepimg = prepimg.transpose((0, 3, 1, 2)) # NHWC to NCHW | |
outputs = exec_net.infer(inputs={input_blob: prepimg}) | |
objects = [] | |
for output in outputs.values(): | |
threshold = 0.01 | |
objects = ParseYOLOV3Output(output, new_h, new_w, camera_height, camera_width, threshold, objects) | |
# Filter for person | |
#objects = list(filter(lambda obj: LABELS[obj.class_id] in ['person'], objects)) | |
objects = list(filter(lambda obj: LABELS[obj.class_id] in ['car', 'cell phone'], objects)) | |
# Filtering overlapping boxes | |
FILTER = False | |
if FILTER: | |
objlen = len(objects) | |
for i in range(objlen): | |
if (objects[i].confidence == 0.0): | |
continue | |
for j in range(i + 1, objlen): | |
if (IntersectionOverUnion(objects[i], objects[j]) >= 0.9): | |
objects[j].confidence = 0 | |
# Drawing boxes | |
new_detected_points = [] | |
for obj in objects: | |
if obj.confidence < 0.1: | |
continue | |
label = obj.class_id | |
confidence = obj.confidence | |
if confidence > 0.1: | |
label_text = LABELS[label] + " (" + "{:.1f}".format(confidence * 100) + "%)" | |
center_point = (obj.xmin//2 + obj.xmax//2, obj.ymin//2 + obj.ymax//2) | |
new_detected_points.append(center_point) | |
cv2.circle(image, center_point, 1, circle_color, thickness=circle_thickness, lineType=8, shift=0) | |
#cv2.rectangle(image, (obj.xmin, obj.ymin), (obj.xmax, obj.ymax), box_color, box_thickness) | |
#cv2.putText(image, label_text, (obj.xmin, obj.ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, label_text_color, 1) | |
detected_points = new_detected_points | |
update_analysis() | |
cv2.putText(image, fps, (camera_width - 170, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38, 0, 255), 1, cv2.LINE_AA) | |
cv2.imshow("Result", image) | |
if cv2.waitKey(1)&0xFF == ord('q'): | |
break | |
elapsedTime = time.time() - t1 | |
fps = "{:.1f} FPS".format(1/elapsedTime) | |
## frame skip, video file only | |
#skip_frame = int((vidfps - int(1/elapsedTime)) / int(1/elapsedTime)) | |
#framepos += skip_frame | |
time.sleep(10) | |
cv2.destroyAllWindows() | |
del net | |
del exec_net | |
del plugin | |
if __name__ == '__main__': | |
sys.exit(main_IE_infer() or 0) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment