Last active
July 18, 2018 21:14
-
-
Save simon-mo/ca85749227c736a410d482d6641b90cb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import time | |
import torch | |
import torch.nn as nn | |
from torch.autograd import Variable | |
import numpy as np | |
import cv2 | |
from util import * | |
import argparse | |
import os | |
import os.path as osp | |
from darknet import Darknet | |
from preprocess import prep_image, inp_to_image | |
import pandas as pd | |
import random | |
import pickle as pkl | |
import itertools | |
class test_net(nn.Module): | |
def __init__(self, num_layers, input_size): | |
super(test_net, self).__init__() | |
self.num_layers= num_layers | |
self.linear_1 = nn.Linear(input_size, 5) | |
self.middle = nn.ModuleList([nn.Linear(5,5) for x in range(num_layers)]) | |
self.output = nn.Linear(5,2) | |
def forward(self, x): | |
x = x.view(-1) | |
fwd = nn.Sequential(self.linear_1, *self.middle, self.output) | |
return fwd(x) | |
def get_test_input(input_dim, CUDA): | |
img = cv2.imread("dog-cycle-car.png") | |
img = cv2.resize(img, (input_dim, input_dim)) | |
img_ = img[:,:,::-1].transpose((2,0,1)) | |
img_ = img_[np.newaxis,:,:,:]/255.0 | |
img_ = torch.from_numpy(img_).float() | |
img_ = Variable(img_) | |
if CUDA: | |
img_ = img_.cuda() | |
num_classes | |
return img_ | |
def arg_parse(): | |
""" | |
Parse arguements to the detect module | |
""" | |
parser = argparse.ArgumentParser(description='YOLO v3 Detection Module') | |
parser.add_argument("--images", dest = 'images', help = | |
"Image / Directory containing images to perform detection upon", | |
default = "imgs", type = str) | |
parser.add_argument("--det", dest = 'det', help = | |
"Image / Directory to store detections to", | |
default = "det", type = str) | |
parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1) | |
parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5) | |
parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4) | |
parser.add_argument("--cfg", dest = 'cfgfile', help = | |
"Config file", | |
default = "cfg/yolov3.cfg", type = str) | |
parser.add_argument("--weights", dest = 'weightsfile', help = | |
"weightsfile", | |
default = "yolov3.weights", type = str) | |
parser.add_argument("--reso", dest = 'reso', help = | |
"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed", | |
default = "416", type = str) | |
parser.add_argument("--scales", dest = "scales", help = "Scales to use for detection", | |
default = "1,2,3", type = str) | |
return parser.parse_args() | |
def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.4): | |
conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2) | |
prediction = prediction*conf_mask | |
try: | |
ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous() | |
except: | |
return 0 | |
box_a = prediction.new(prediction.shape) | |
box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2) | |
box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2) | |
box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) | |
box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2) | |
prediction[:,:,:4] = box_a[:,:,:4] | |
batch_size = prediction.size(0) | |
output = prediction.new(1, prediction.size(2) + 1) | |
write = False | |
for ind in range(batch_size): | |
#select the image from the batch | |
image_pred = prediction[ind] | |
#Get the class having maximum score, and the index of that class | |
#Get rid of num_classes softmax scores | |
#Add the class index and the class score of class having maximum score | |
max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1) | |
max_conf = max_conf.float().unsqueeze(1) | |
max_conf_score = max_conf_score.float().unsqueeze(1) | |
seq = (image_pred[:,:5], max_conf, max_conf_score) | |
image_pred = torch.cat(seq, 1) | |
#Get rid of the zero entries | |
non_zero_ind = (torch.nonzero(image_pred[:,4])) | |
image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7) | |
#Get the various classes detected in the image | |
try: | |
img_classes = unique(image_pred_[:,-1]) | |
except: | |
continue | |
#WE will do NMS classwise | |
for cls in img_classes: | |
#get the detections with one particular class | |
cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1) | |
class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze() | |
image_pred_class = image_pred_[class_mask_ind].view(-1,7) | |
#sort the detections such that the entry with the maximum objectness | |
#confidence is at the top | |
conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1] | |
image_pred_class = image_pred_class[conf_sort_index] | |
idx = image_pred_class.size(0) | |
#if nms has to be done | |
if nms: | |
#For each detection | |
for i in range(idx): | |
#Get the IOUs of all boxes that come after the one we are looking at | |
#in the loop | |
try: | |
ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:]) | |
except ValueError: | |
break | |
except IndexError: | |
break | |
#Zero out all the detections that have IoU > treshhold | |
iou_mask = (ious < nms_conf).float().unsqueeze(1) | |
image_pred_class[i+1:] *= iou_mask | |
#Remove the non-zero entries | |
non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze() | |
image_pred_class = image_pred_class[non_zero_ind].view(-1,7) | |
#Concatenate the batch_id of the image to the detection | |
#this helps us identify which image does the detection correspond to | |
#We use a linear straucture to hold ALL the detections from the batch | |
#the batch_dim is flattened | |
#batch is identified by extra batch column | |
batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind) | |
seq = batch_ind, image_pred_class | |
if not write: | |
output = torch.cat(seq,1) | |
write = True | |
else: | |
out = torch.cat(seq,1) | |
output = torch.cat((output,out)) | |
return output | |
def predict_func(model, xs): | |
import urllib | |
import json | |
def letterbox_image(img, inp_dim): | |
'''resize image with unchanged aspect ratio using padding''' | |
img_w, img_h = img.shape[1], img.shape[0] | |
w, h = inp_dim | |
new_w = int(img_w * min(w/img_w, h/img_h)) | |
new_h = int(img_h * min(w/img_w, h/img_h)) | |
resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC) | |
canvas = np.full((inp_dim[1], inp_dim[0], 3), 128) | |
canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w, :] = resized_image | |
return canvas | |
def prep_image(img, inp_dim): | |
""" | |
Prepare image for inputting to the neural network. | |
Returns a Variable | |
""" | |
orig_im = img | |
dim = orig_im.shape[1], orig_im.shape[0] | |
img = (letterbox_image(orig_im, (inp_dim, inp_dim))) | |
img_ = img[:,:,::-1].transpose((2,0,1)).copy() | |
img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0) | |
print(img_.shape, dim) | |
return img_, orig_im, dim | |
import os | |
os.environ['CUDA_VISIBLE_DEVICES'] = '6' | |
inp_dim = 416 | |
def rescale(array): | |
scaling_factor = inp_dim/dim[0] | |
array[0] -= (inp_dim - scaling_factor*dim[0])/2 | |
array[2] -= (inp_dim - scaling_factor*dim[0])/2 | |
array[1] -= (inp_dim - scaling_factor*dim[1])/2 | |
array[3] -= (inp_dim - scaling_factor*dim[1])/2 | |
return array / scaling_factor | |
print(xs) | |
results = [] | |
for x in xs: | |
try: | |
resp = urllib.request.urlopen(x.decode()) | |
except: # Handle Network Error | |
results.append("404") | |
continue | |
image = np.asarray(bytearray(resp.read()), dtype = "uint8") | |
image = cv2.imdecode(image, cv2.IMREAD_COLOR) | |
inp_tensor , orig_im, dim = prep_image(image, inp_dim) | |
inp_tensor = inp_tensor.cuda() | |
#result = model(torch.autograd.Variable(inp_tensor), True) | |
result = model(inp_tensor, True) | |
prediction = write_results(result, 0.5, 80, nms = True, nms_conf = 0.4) | |
#result_np = prediction.cpu().numpy() | |
result_np = prediction.cpu().detach().numpy() | |
result_string = json.dumps([{'bbox':rescale(single_res[1:5]).tolist(), 'category': int(single_res[-1])} for single_res in result_np]) | |
results.append(result_string) | |
return results | |
if __name__ == '__main__': | |
args = arg_parse() | |
scales = args.scales | |
# scales = [int(x) for x in scales.split(',')] | |
# | |
# | |
# | |
# args.reso = int(args.reso) | |
# | |
# num_boxes = [args.reso//32, args.reso//16, args.reso//8] | |
# scale_indices = [3*(x**2) for x in num_boxes] | |
# scale_indices = list(itertools.accumulate(scale_indices, lambda x,y : x+y)) | |
# | |
# | |
# li = [] | |
# i = 0 | |
# for scale in scale_indices: | |
# li.extend(list(range(i, scale))) | |
# i = scale | |
# | |
# scale_indices = li | |
images = args.images | |
batch_size = int(args.bs) | |
confidence = float(args.confidence) | |
nms_thesh = float(args.nms_thresh) | |
start = 0 | |
CUDA = torch.cuda.is_available() | |
num_classes = 80 | |
classes = load_classes('data/coco.names') | |
#Set up the neural network | |
print("Loading network.....") | |
model = Darknet(args.cfgfile) | |
model.load_weights(args.weightsfile) | |
print("Network successfully loaded") | |
model.net_info["height"] = args.reso | |
inp_dim = int(model.net_info["height"]) | |
assert inp_dim % 32 == 0 | |
assert inp_dim > 32 | |
print("Loading to CUDA") | |
#If there's a GPU availible, put the model on GPU | |
if CUDA: | |
model.cuda() | |
#Set the model in evaluation mode | |
model.eval() | |
print("Model loaded to CUDA") | |
xs = ['https://s3-us-west-2.amazonaws.com/scalabel-public/demo/frames/intersection-0000051.jpg'] | |
import time | |
print("start predict") | |
start = time.time() | |
#print(predict_func(model, xs)) | |
print("time", time.time() - start) | |
import sys | |
#sys.exit(0) | |
from clipper_admin import ClipperConnection, DockerContainerManager | |
from clipper_admin.deployers.pytorch import deploy_pytorch_model | |
clipper_conn = ClipperConnection(DockerContainerManager()) | |
clipper_conn.connect() | |
deploy_pytorch_model( | |
clipper_conn=clipper_conn, | |
name='yolo', | |
version=1, | |
input_type='strings', | |
func=predict_func, | |
pytorch_model=model, | |
pkgs_to_install = ['numpy', 'opencv-python==3.2.0.8', 'pillow', 'pandas'] | |
) | |
clipper_conn.link_model_to_app(app_name='scalabel', model_name='yolo') | |
import sys | |
sys.exit(0) | |
xs = ['https://s3-us-west-2.amazonaws.com/scalabel-public/demo/frames/intersection-0000051.jpg'] | |
import time | |
print("start predict") | |
start = time.time() | |
print(predict_func(model, xs)) | |
print("time", time.time() - start) | |
import sys | |
sys.exit(0) | |
read_dir = time.time() | |
#Detection phase | |
try: | |
imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] =='.jpeg' or os.path.splitext(img)[1] =='.jpg'] | |
except NotADirectoryError: | |
imlist = [] | |
imlist.append(osp.join(osp.realpath('.'), images)) | |
except FileNotFoundError: | |
print ("No file or directory with the name {}".format(images)) | |
exit() | |
if not os.path.exists(args.det): | |
os.makedirs(args.det) | |
load_batch = time.time() | |
batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))])) | |
im_batches = [x[0] for x in batches] | |
orig_ims = [x[1] for x in batches] | |
im_dim_list = [x[2] for x in batches] | |
im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2) | |
if CUDA: | |
im_dim_list = im_dim_list.cuda() | |
leftover = 0 | |
if (len(im_dim_list) % batch_size): | |
leftover = 1 | |
if batch_size != 1: | |
num_batches = len(imlist) // batch_size + leftover | |
im_batches = [torch.cat((im_batches[i*batch_size : min((i + 1)*batch_size, | |
len(im_batches))])) for i in range(num_batches)] | |
i = 0 | |
write = False | |
model(get_test_input(inp_dim, CUDA), CUDA) | |
start_det_loop = time.time() | |
objs = {} | |
for batch in im_batches: | |
#load the image | |
start = time.time() | |
if CUDA: | |
batch = batch.cuda() | |
#Apply offsets to the result predictions | |
#Tranform the predictions as described in the YOLO paper | |
#flatten the prediction vector | |
# B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes) | |
# Put every proposed box as a row. | |
with torch.no_grad(): | |
prediction = model(Variable(batch), CUDA) | |
# prediction = prediction[:,scale_indices] | |
#get the boxes with object confidence > threshold | |
#Convert the cordinates to absolute coordinates | |
#perform NMS on these boxes, and save the results | |
#I could have done NMS and saving seperately to have a better abstraction | |
#But both these operations require looping, hence | |
#clubbing these ops in one loop instead of two. | |
#loops are slower than vectorised operations. | |
prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh) | |
if type(prediction) == int: | |
i += 1 | |
continue | |
end = time.time() | |
# print(end - start) | |
prediction[:,0] += i*batch_size | |
if not write: | |
output = prediction | |
write = 1 | |
else: | |
output = torch.cat((output,prediction)) | |
for im_num, image in enumerate(imlist[i*batch_size: min((i + 1)*batch_size, len(imlist))]): | |
im_id = i*batch_size + im_num | |
objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id] | |
print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size)) | |
print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs))) | |
print("----------------------------------------------------------") | |
i += 1 | |
if CUDA: | |
torch.cuda.synchronize() | |
try: | |
output | |
except NameError: | |
print("No detections were made") | |
exit() | |
im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long()) | |
scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1) | |
output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2 | |
output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2 | |
output[:,1:5] /= scaling_factor | |
for i in range(output.shape[0]): | |
output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0]) | |
output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1]) | |
output_recast = time.time() | |
class_load = time.time() | |
colors = pkl.load(open("pallete", "rb")) | |
draw = time.time() | |
def write(x, batches, results): | |
c1 = tuple(x[1:3].int()) | |
c2 = tuple(x[3:5].int()) | |
img = results[int(x[0])] | |
cls = int(x[-1]) | |
label = "{0}".format(classes[cls]) | |
color = random.choice(colors) | |
cv2.rectangle(img, c1, c2,color, 1) | |
t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0] | |
c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4 | |
cv2.rectangle(img, c1, c2,color, -1) | |
cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1) | |
return img | |
list(map(lambda x: write(x, im_batches, orig_ims), output)) | |
det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1])) | |
list(map(cv2.imwrite, det_names, orig_ims)) | |
end = time.time() | |
print() | |
print("SUMMARY") | |
print("----------------------------------------------------------") | |
print("{:25s}: {}".format("Task", "Time Taken (in seconds)")) | |
print() | |
print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir)) | |
print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch)) | |
print("{:25s}: {:2.3f}".format("Detection (" + str(len(imlist)) + " images)", output_recast - start_det_loop)) | |
print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast)) | |
print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw)) | |
print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch)/len(imlist))) | |
print("----------------------------------------------------------") | |
torch.cuda.empty_cache() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment