Last active July 18, 2018 21:14
from __future__ import division
import time
import torch
import torch.nn as nn
from torch.autograd import Variable
import numpy as np
import cv2
from util import *
import argparse
import os
import os.path as osp
from darknet import Darknet
from preprocess import prep_image, inp_to_image
import pandas as pd
import random
import pickle as pkl
import itertools
class test_net(nn.Module):
def __init__(self, num_layers, input_size):
super(test_net, self).__init__()
self.num_layers= num_layers
self.linear_1 = nn.Linear(input_size, 5)
self.middle = nn.ModuleList([nn.Linear(5,5) for x in range(num_layers)])
self.output = nn.Linear(5,2)
def forward(self, x):
x = x.view(-1)
fwd = nn.Sequential(self.linear_1, *self.middle, self.output)
return fwd(x)
def get_test_input(input_dim, CUDA):
img = cv2.imread("dog-cycle-car.png")
img = cv2.resize(img, (input_dim, input_dim))
img_ = img[:,:,::-1].transpose((2,0,1))
img_ = img_[np.newaxis,:,:,:]/255.0
img_ = torch.from_numpy(img_).float()
img_ = Variable(img_)
if CUDA:
img_ = img_.cuda()
return img_
def arg_parse():
Parse arguements to the detect module
parser = argparse.ArgumentParser(description='YOLO v3 Detection Module')
parser.add_argument("--images", dest = 'images', help =
"Image / Directory containing images to perform detection upon",
default = "imgs", type = str)
parser.add_argument("--det", dest = 'det', help =
"Image / Directory to store detections to",
default = "det", type = str)
parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1)
parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
parser.add_argument("--cfg", dest = 'cfgfile', help =
"Config file",
default = "cfg/yolov3.cfg", type = str)
parser.add_argument("--weights", dest = 'weightsfile', help =
default = "yolov3.weights", type = str)
parser.add_argument("--reso", dest = 'reso', help =
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
default = "416", type = str)
parser.add_argument("--scales", dest = "scales", help = "Scales to use for detection",
default = "1,2,3", type = str)
return parser.parse_args()
def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.4):
conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
prediction = prediction*conf_mask
ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
return 0
box_a =
box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2)
box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
prediction[:,:,:4] = box_a[:,:,:4]
batch_size = prediction.size(0)
output =, prediction.size(2) + 1)
write = False
for ind in range(batch_size):
#select the image from the batch
image_pred = prediction[ind]
#Get the class having maximum score, and the index of that class
#Get rid of num_classes softmax scores
#Add the class index and the class score of class having maximum score
max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
max_conf = max_conf.float().unsqueeze(1)
max_conf_score = max_conf_score.float().unsqueeze(1)
seq = (image_pred[:,:5], max_conf, max_conf_score)
image_pred =, 1)
#Get rid of the zero entries
non_zero_ind = (torch.nonzero(image_pred[:,4]))
image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
#Get the various classes detected in the image
img_classes = unique(image_pred_[:,-1])
#WE will do NMS classwise
for cls in img_classes:
#get the detections with one particular class
cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
image_pred_class = image_pred_[class_mask_ind].view(-1,7)
#sort the detections such that the entry with the maximum objectness
#confidence is at the top
conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
image_pred_class = image_pred_class[conf_sort_index]
idx = image_pred_class.size(0)
#if nms has to be done
if nms:
#For each detection
for i in range(idx):
#Get the IOUs of all boxes that come after the one we are looking at
#in the loop
ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
except ValueError:
except IndexError:
#Zero out all the detections that have IoU > treshhold
iou_mask = (ious < nms_conf).float().unsqueeze(1)
image_pred_class[i+1:] *= iou_mask
#Remove the non-zero entries
non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
image_pred_class = image_pred_class[non_zero_ind].view(-1,7)
#Concatenate the batch_id of the image to the detection
#this helps us identify which image does the detection correspond to
#We use a linear straucture to hold ALL the detections from the batch
#the batch_dim is flattened
#batch is identified by extra batch column
batch_ind =, 1).fill_(ind)
seq = batch_ind, image_pred_class
if not write:
output =,1)
write = True
out =,1)
output =,out))
return output
def predict_func(model, xs):
import urllib
import json
def letterbox_image(img, inp_dim):
'''resize image with unchanged aspect ratio using padding'''
img_w, img_h = img.shape[1], img.shape[0]
w, h = inp_dim
new_w = int(img_w * min(w/img_w, h/img_h))
new_h = int(img_h * min(w/img_w, h/img_h))
resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)
canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w, :] = resized_image
return canvas
def prep_image(img, inp_dim):
Prepare image for inputting to the neural network.
Returns a Variable
orig_im = img
dim = orig_im.shape[1], orig_im.shape[0]
img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
img_ = img[:,:,::-1].transpose((2,0,1)).copy()
img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
print(img_.shape, dim)
return img_, orig_im, dim
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '6'
inp_dim = 416
def rescale(array):
scaling_factor = inp_dim/dim[0]
array[0] -= (inp_dim - scaling_factor*dim[0])/2
array[2] -= (inp_dim - scaling_factor*dim[0])/2
array[1] -= (inp_dim - scaling_factor*dim[1])/2
array[3] -= (inp_dim - scaling_factor*dim[1])/2
return array / scaling_factor
results = []
for x in xs:
resp = urllib.request.urlopen(x.decode())
except: # Handle Network Error
image = np.asarray(bytearray(, dtype = "uint8")
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
inp_tensor , orig_im, dim = prep_image(image, inp_dim)
inp_tensor = inp_tensor.cuda()
#result = model(torch.autograd.Variable(inp_tensor), True)
result = model(inp_tensor, True)
prediction = write_results(result, 0.5, 80, nms = True, nms_conf = 0.4)
#result_np = prediction.cpu().numpy()
result_np = prediction.cpu().detach().numpy()
result_string = json.dumps([{'bbox':rescale(single_res[1:5]).tolist(), 'category': int(single_res[-1])} for single_res in result_np])
return results
if __name__ == '__main__':
args = arg_parse()
scales = args.scales
# scales = [int(x) for x in scales.split(',')]
# args.reso = int(args.reso)
# num_boxes = [args.reso//32, args.reso//16, args.reso//8]
# scale_indices = [3*(x**2) for x in num_boxes]
# scale_indices = list(itertools.accumulate(scale_indices, lambda x,y : x+y))
# li = []
# i = 0
# for scale in scale_indices:
# li.extend(list(range(i, scale)))
# i = scale
# scale_indices = li
images = args.images
batch_size = int(
confidence = float(args.confidence)
nms_thesh = float(args.nms_thresh)
start = 0
CUDA = torch.cuda.is_available()
num_classes = 80
classes = load_classes('data/coco.names')
#Set up the neural network
print("Loading network.....")
model = Darknet(args.cfgfile)
print("Network successfully loaded")
model.net_info["height"] = args.reso
inp_dim = int(model.net_info["height"])
assert inp_dim % 32 == 0
assert inp_dim > 32
print("Loading to CUDA")
#If there's a GPU availible, put the model on GPU
if CUDA:
#Set the model in evaluation mode
print("Model loaded to CUDA")
xs = ['']
import time
print("start predict")
start = time.time()
#print(predict_func(model, xs))
print("time", time.time() - start)
import sys
from clipper_admin import ClipperConnection, DockerContainerManager
from clipper_admin.deployers.pytorch import deploy_pytorch_model
clipper_conn = ClipperConnection(DockerContainerManager())
pkgs_to_install = ['numpy', 'opencv-python==', 'pillow', 'pandas']
clipper_conn.link_model_to_app(app_name='scalabel', model_name='yolo')
import sys
xs = ['']
import time
print("start predict")
start = time.time()
print(predict_func(model, xs))
print("time", time.time() - start)
import sys
read_dir = time.time()
#Detection phase
imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] =='.jpeg' or os.path.splitext(img)[1] =='.jpg']
except NotADirectoryError:
imlist = []
imlist.append(osp.join(osp.realpath('.'), images))
except FileNotFoundError:
print ("No file or directory with the name {}".format(images))
if not os.path.exists(args.det):
load_batch = time.time()
batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
im_batches = [x[0] for x in batches]
orig_ims = [x[1] for x in batches]
im_dim_list = [x[2] for x in batches]
im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
if CUDA:
im_dim_list = im_dim_list.cuda()
leftover = 0
if (len(im_dim_list) % batch_size):
leftover = 1
if batch_size != 1:
num_batches = len(imlist) // batch_size + leftover
im_batches = [[i*batch_size : min((i + 1)*batch_size,
len(im_batches))])) for i in range(num_batches)]
i = 0
write = False
model(get_test_input(inp_dim, CUDA), CUDA)
start_det_loop = time.time()
objs = {}
for batch in im_batches:
#load the image
start = time.time()
if CUDA:
batch = batch.cuda()
#Apply offsets to the result predictions
#Tranform the predictions as described in the YOLO paper
#flatten the prediction vector
# B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes)
# Put every proposed box as a row.
with torch.no_grad():
prediction = model(Variable(batch), CUDA)
# prediction = prediction[:,scale_indices]
#get the boxes with object confidence > threshold
#Convert the cordinates to absolute coordinates
#perform NMS on these boxes, and save the results
#I could have done NMS and saving seperately to have a better abstraction
#But both these operations require looping, hence
#clubbing these ops in one loop instead of two.
#loops are slower than vectorised operations.
prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh)
if type(prediction) == int:
i += 1
end = time.time()
# print(end - start)
prediction[:,0] += i*batch_size
if not write:
output = prediction
write = 1
output =,prediction))
for im_num, image in enumerate(imlist[i*batch_size: min((i + 1)*batch_size, len(imlist))]):
im_id = i*batch_size + im_num
objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
i += 1
if CUDA:
except NameError:
print("No detections were made")
im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)
output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
output[:,1:5] /= scaling_factor
for i in range(output.shape[0]):
output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
output_recast = time.time()
class_load = time.time()
colors = pkl.load(open("pallete", "rb"))
draw = time.time()
def write(x, batches, results):
c1 = tuple(x[1:3].int())
c2 = tuple(x[3:5].int())
img = results[int(x[0])]
cls = int(x[-1])
label = "{0}".format(classes[cls])
color = random.choice(colors)
cv2.rectangle(img, c1, c2,color, 1)
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
cv2.rectangle(img, c1, c2,color, -1)
cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
return img
list(map(lambda x: write(x, im_batches, orig_ims), output))
det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1]))
list(map(cv2.imwrite, det_names, orig_ims))
end = time.time()
print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir))
print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch))
print("{:25s}: {:2.3f}".format("Detection (" + str(len(imlist)) + " images)", output_recast - start_det_loop))
print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast))
print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch)/len(imlist)))
