simon-mo · July 18, 2018 21:14
diff --git a/deploy_yolov3.py b/deploy_yolov3.py
 from __future__ import division
 import time
 import torch 
 import torch.nn as nn
 from torch.autograd import Variable
 import numpy as np
 import cv2 
 from util import *
 import argparse
 import os 
 import os.path as osp
 from darknet import Darknet
 from preprocess import prep_image, inp_to_image
 import pandas as pd
 import random 
 import pickle as pkl
 import itertools

 class test_net(nn.Module):
    def __init__(self, num_layers, input_size):
        super(test_net, self).__init__()
        self.num_layers= num_layers
        self.linear_1 = nn.Linear(input_size, 5)
        self.middle = nn.ModuleList([nn.Linear(5,5) for x in range(num_layers)])
        self.output = nn.Linear(5,2)
    
    def forward(self, x):
        x = x.view(-1)
        fwd = nn.Sequential(self.linear_1, *self.middle, self.output)
        return fwd(x)
        
 def get_test_input(input_dim, CUDA):
    img = cv2.imread("dog-cycle-car.png")
    img = cv2.resize(img, (input_dim, input_dim)) 
    img_ =  img[:,:,::-1].transpose((2,0,1))
    img_ = img_[np.newaxis,:,:,:]/255.0
    img_ = torch.from_numpy(img_).float()
    img_ = Variable(img_)
    
    if CUDA:
        img_ = img_.cuda()
    num_classes
    return img_



 def arg_parse():
    """
    Parse arguements to the detect module
    
    """
    
    
    parser = argparse.ArgumentParser(description='YOLO v3 Detection Module')
   
    parser.add_argument("--images", dest = 'images', help = 
                        "Image / Directory containing images to perform detection upon",
                        default = "imgs", type = str)
    parser.add_argument("--det", dest = 'det', help = 
                        "Image / Directory to store detections to",
                        default = "det", type = str)
    parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1)
    parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
    parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
    parser.add_argument("--cfg", dest = 'cfgfile', help = 
                        "Config file",
                        default = "cfg/yolov3.cfg", type = str)
    parser.add_argument("--weights", dest = 'weightsfile', help = 
                        "weightsfile",
                        default = "yolov3.weights", type = str)
    parser.add_argument("--reso", dest = 'reso', help = 
                        "Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
                        default = "416", type = str)
    parser.add_argument("--scales", dest = "scales", help = "Scales to use for detection",
                        default = "1,2,3", type = str)
    
    return parser.parse_args()

 def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.4):
    conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
    prediction = prediction*conf_mask
    

    try:
        ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
    except:
        return 0
    
    
    box_a = prediction.new(prediction.shape)
    box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
    box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
    box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2) 
    box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
    prediction[:,:,:4] = box_a[:,:,:4]
    

    
    batch_size = prediction.size(0)
    
    output = prediction.new(1, prediction.size(2) + 1)
    write = False


    for ind in range(batch_size):
        #select the image from the batch
        image_pred = prediction[ind]
        

        
        #Get the class having maximum score, and the index of that class
        #Get rid of num_classes softmax scores 
        #Add the class index and the class score of class having maximum score
        max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
        max_conf = max_conf.float().unsqueeze(1)
        max_conf_score = max_conf_score.float().unsqueeze(1)
        seq = (image_pred[:,:5], max_conf, max_conf_score)
        image_pred = torch.cat(seq, 1)
        

        
        #Get rid of the zero entries
        non_zero_ind =  (torch.nonzero(image_pred[:,4]))

        
        image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)
        
        #Get the various classes detected in the image
        try:
            img_classes = unique(image_pred_[:,-1])
        except:
             continue
        #WE will do NMS classwise
        for cls in img_classes:
            #get the detections with one particular class
            cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
            class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()
            

            image_pred_class = image_pred_[class_mask_ind].view(-1,7)

        
        
             #sort the detections such that the entry with the maximum objectness
             #confidence is at the top
            conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
            image_pred_class = image_pred_class[conf_sort_index]
            idx = image_pred_class.size(0)
            
            #if nms has to be done
            if nms:
                #For each detection
                for i in range(idx):
                    #Get the IOUs of all boxes that come after the one we are looking at 
                    #in the loop
                    try:
                        ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
                    except ValueError:
                        break
        
                    except IndexError:
                        break
                    
                    #Zero out all the detections that have IoU > treshhold
                    iou_mask = (ious < nms_conf).float().unsqueeze(1)
                    image_pred_class[i+1:] *= iou_mask       
                    
                    #Remove the non-zero entries
                    non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
                    image_pred_class = image_pred_class[non_zero_ind].view(-1,7)
                    
                    

            #Concatenate the batch_id of the image to the detection
            #this helps us identify which image does the detection correspond to 
            #We use a linear straucture to hold ALL the detections from the batch
            #the batch_dim is flattened
            #batch is identified by extra batch column
            
            
            batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
            seq = batch_ind, image_pred_class
            if not write:
                output = torch.cat(seq,1)
                write = True
            else:
                out = torch.cat(seq,1)
                output = torch.cat((output,out))
    
    return output



 def predict_func(model, xs):
    import urllib
    import json
    def letterbox_image(img, inp_dim):
        '''resize image with unchanged aspect ratio using padding'''
        img_w, img_h = img.shape[1], img.shape[0]
        w, h = inp_dim
        new_w = int(img_w * min(w/img_w, h/img_h))
        new_h = int(img_h * min(w/img_w, h/img_h))
        resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)
        
        canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
        canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w,  :] = resized_image
        
        return canvas
    def prep_image(img, inp_dim):
        """
        Prepare image for inputting to the neural network. 
        
        Returns a Variable 
        """
        orig_im = img
        dim = orig_im.shape[1], orig_im.shape[0]
        img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
        img_ = img[:,:,::-1].transpose((2,0,1)).copy()
        img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
        print(img_.shape, dim)
        return img_, orig_im, dim


    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = '6'

    inp_dim = 416
    def rescale(array):
        scaling_factor = inp_dim/dim[0]
        array[0] -= (inp_dim - scaling_factor*dim[0])/2
        array[2] -= (inp_dim - scaling_factor*dim[0])/2
        array[1] -= (inp_dim - scaling_factor*dim[1])/2
        array[3] -= (inp_dim - scaling_factor*dim[1])/2
        return array / scaling_factor
    print(xs)
    results = []
    for x in xs:
        try:
            resp = urllib.request.urlopen(x.decode())
        except: # Handle Network Error
            results.append("404")
            continue
        image = np.asarray(bytearray(resp.read()), dtype = "uint8")
        image = cv2.imdecode(image, cv2.IMREAD_COLOR)
        inp_tensor , orig_im, dim = prep_image(image, inp_dim)
        inp_tensor = inp_tensor.cuda()
        #result = model(torch.autograd.Variable(inp_tensor), True)
        result = model(inp_tensor, True)
        prediction = write_results(result, 0.5, 80, nms = True, nms_conf = 0.4)
        #result_np = prediction.cpu().numpy()
        result_np = prediction.cpu().detach().numpy()
        result_string = json.dumps([{'bbox':rescale(single_res[1:5]).tolist(), 'category': int(single_res[-1])} for single_res in result_np])
        results.append(result_string)
    return results



 if __name__ ==  '__main__':
    args = arg_parse()
    
    scales = args.scales
    
    
 #        scales = [int(x) for x in scales.split(',')]
 #        
 #        
 #        
 #        args.reso = int(args.reso)
 #        
 #        num_boxes = [args.reso//32, args.reso//16, args.reso//8]    
 #        scale_indices = [3*(x**2) for x in num_boxes]
 #        scale_indices = list(itertools.accumulate(scale_indices, lambda x,y : x+y))
 #    
 #        
 #        li = []
 #        i = 0
 #        for scale in scale_indices:        
 #            li.extend(list(range(i, scale))) 
 #            i = scale
 #        
 #        scale_indices = li

    images = args.images
    batch_size = int(args.bs)
    confidence = float(args.confidence)
    nms_thesh = float(args.nms_thresh)
    start = 0

    CUDA = torch.cuda.is_available()

    num_classes = 80
    classes = load_classes('data/coco.names') 

    #Set up the neural network
    print("Loading network.....")
    model = Darknet(args.cfgfile)
    model.load_weights(args.weightsfile)
    print("Network successfully loaded")
    
    model.net_info["height"] = args.reso
    inp_dim = int(model.net_info["height"])
    assert inp_dim % 32 == 0 
    assert inp_dim > 32
    print("Loading to CUDA")
    #If there's a GPU availible, put the model on GPU
    if CUDA:
        model.cuda()
    
    
    #Set the model in evaluation mode
    model.eval()

    print("Model loaded to CUDA")

    xs = ['https://s3-us-west-2.amazonaws.com/scalabel-public/demo/frames/intersection-0000051.jpg']
    import time
    print("start predict")
    start = time.time()
    #print(predict_func(model, xs))
    print("time", time.time() - start)

    import sys
    #sys.exit(0) 
    
    from clipper_admin import ClipperConnection, DockerContainerManager

    from clipper_admin.deployers.pytorch import deploy_pytorch_model
    clipper_conn = ClipperConnection(DockerContainerManager())
    clipper_conn.connect()
    deploy_pytorch_model(
        clipper_conn=clipper_conn,
        name='yolo',
        version=1,
        input_type='strings',
        func=predict_func,
        pytorch_model=model,
        pkgs_to_install = ['numpy', 'opencv-python==3.2.0.8', 'pillow', 'pandas']
    )
    clipper_conn.link_model_to_app(app_name='scalabel', model_name='yolo')

    import sys
    sys.exit(0) 

    
    xs = ['https://s3-us-west-2.amazonaws.com/scalabel-public/demo/frames/intersection-0000051.jpg']
    import time
    print("start predict")
    start = time.time()
    print(predict_func(model, xs))
    print("time", time.time() - start)

    import sys
    sys.exit(0) 
    




    read_dir = time.time()
    #Detection phase
    try:
        imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] =='.jpeg' or os.path.splitext(img)[1] =='.jpg']
    except NotADirectoryError:
        imlist = []
        imlist.append(osp.join(osp.realpath('.'), images))
    except FileNotFoundError:
        print ("No file or directory with the name {}".format(images))
        exit()
        
    if not os.path.exists(args.det):
        os.makedirs(args.det)
        
    load_batch = time.time()
    
    batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
    im_batches = [x[0] for x in batches]
    orig_ims = [x[1] for x in batches]
    im_dim_list = [x[2] for x in batches]
    im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)
    
    
    
    if CUDA:
        im_dim_list = im_dim_list.cuda()
    
    leftover = 0
    
    if (len(im_dim_list) % batch_size):
        leftover = 1
        
        
    if batch_size != 1:
        num_batches = len(imlist) // batch_size + leftover            
        im_batches = [torch.cat((im_batches[i*batch_size : min((i +  1)*batch_size,
                            len(im_batches))]))  for i in range(num_batches)]        


    i = 0
    

    write = False
    model(get_test_input(inp_dim, CUDA), CUDA)
    
    start_det_loop = time.time()
    
    objs = {}
    
    
    
    for batch in im_batches:
        #load the image 
        start = time.time()
        if CUDA:
            batch = batch.cuda()
        

        #Apply offsets to the result predictions
        #Tranform the predictions as described in the YOLO paper
        #flatten the prediction vector 
        # B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes) 
        # Put every proposed box as a row.
        with torch.no_grad():
            prediction = model(Variable(batch), CUDA)
        
 #        prediction = prediction[:,scale_indices]

        
        #get the boxes with object confidence > threshold
        #Convert the cordinates to absolute coordinates
        #perform NMS on these boxes, and save the results 
        #I could have done NMS and saving seperately to have a better abstraction
        #But both these operations require looping, hence 
        #clubbing these ops in one loop instead of two. 
        #loops are slower than vectorised operations. 
        
        prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh)
        
        
        if type(prediction) == int:
            i += 1
            continue

        end = time.time()
        
                    
 #        print(end - start)

            

        prediction[:,0] += i*batch_size
        
    
            
          
        if not write:
            output = prediction
            write = 1
        else:
            output = torch.cat((output,prediction))
            
        
        

        for im_num, image in enumerate(imlist[i*batch_size: min((i +  1)*batch_size, len(imlist))]):
            im_id = i*batch_size + im_num
            objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
            print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
            print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
            print("----------------------------------------------------------")
        i += 1

        
        if CUDA:
            torch.cuda.synchronize()
    
    try:
        output
    except NameError:
        print("No detections were made")
        exit()
        
    im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())
    
    scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)
    
    
    output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
    output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2
    
    
    
    output[:,1:5] /= scaling_factor
    
    for i in range(output.shape[0]):
        output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
        output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])
        
        
    output_recast = time.time()
    
    
    class_load = time.time()

    colors = pkl.load(open("pallete", "rb"))
    
    
    draw = time.time()


    def write(x, batches, results):
        c1 = tuple(x[1:3].int())
        c2 = tuple(x[3:5].int())
        img = results[int(x[0])]
        cls = int(x[-1])
        label = "{0}".format(classes[cls])
        color = random.choice(colors)
        cv2.rectangle(img, c1, c2,color, 1)
        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
        c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
        cv2.rectangle(img, c1, c2,color, -1)
        cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
        return img
    
            
    list(map(lambda x: write(x, im_batches, orig_ims), output))
      
    det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1]))
    
    list(map(cv2.imwrite, det_names, orig_ims))
    
    end = time.time()
    
    print()
    print("SUMMARY")
    print("----------------------------------------------------------")
    print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
    print()
    print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir))
    print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch))
    print("{:25s}: {:2.3f}".format("Detection (" + str(len(imlist)) +  " images)", output_recast - start_det_loop))
    print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast))
    print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
    print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch)/len(imlist)))
    print("----------------------------------------------------------")

    
    torch.cuda.empty_cache()
	from __future__ import division
	import time
	import torch
	import torch.nn as nn
	from torch.autograd import Variable
	import numpy as np
	import cv2
	from util import *
	import argparse
	import os
	import os.path as osp
	from darknet import Darknet
	from preprocess import prep_image, inp_to_image
	import pandas as pd
	import random
	import pickle as pkl
	import itertools

	class test_net(nn.Module):
	def __init__(self, num_layers, input_size):
	super(test_net, self).__init__()
	self.num_layers= num_layers
	self.linear_1 = nn.Linear(input_size, 5)
	self.middle = nn.ModuleList([nn.Linear(5,5) for x in range(num_layers)])
	self.output = nn.Linear(5,2)

	def forward(self, x):
	x = x.view(-1)
	fwd = nn.Sequential(self.linear_1, *self.middle, self.output)
	return fwd(x)

	def get_test_input(input_dim, CUDA):
	img = cv2.imread("dog-cycle-car.png")
	img = cv2.resize(img, (input_dim, input_dim))
	img_ = img[:,:,::-1].transpose((2,0,1))
	img_ = img_[np.newaxis,:,:,:]/255.0
	img_ = torch.from_numpy(img_).float()
	img_ = Variable(img_)

	if CUDA:
	img_ = img_.cuda()
	num_classes
	return img_



	def arg_parse():
	"""
	Parse arguements to the detect module

	"""


	parser = argparse.ArgumentParser(description='YOLO v3 Detection Module')

	parser.add_argument("--images", dest = 'images', help =
	"Image / Directory containing images to perform detection upon",
	default = "imgs", type = str)
	parser.add_argument("--det", dest = 'det', help =
	"Image / Directory to store detections to",
	default = "det", type = str)
	parser.add_argument("--bs", dest = "bs", help = "Batch size", default = 1)
	parser.add_argument("--confidence", dest = "confidence", help = "Object Confidence to filter predictions", default = 0.5)
	parser.add_argument("--nms_thresh", dest = "nms_thresh", help = "NMS Threshhold", default = 0.4)
	parser.add_argument("--cfg", dest = 'cfgfile', help =
	"Config file",
	default = "cfg/yolov3.cfg", type = str)
	parser.add_argument("--weights", dest = 'weightsfile', help =
	"weightsfile",
	default = "yolov3.weights", type = str)
	parser.add_argument("--reso", dest = 'reso', help =
	"Input resolution of the network. Increase to increase accuracy. Decrease to increase speed",
	default = "416", type = str)
	parser.add_argument("--scales", dest = "scales", help = "Scales to use for detection",
	default = "1,2,3", type = str)

	return parser.parse_args()

	def write_results(prediction, confidence, num_classes, nms = True, nms_conf = 0.4):
	conf_mask = (prediction[:,:,4] > confidence).float().unsqueeze(2)
	prediction = prediction*conf_mask


	try:
	ind_nz = torch.nonzero(prediction[:,:,4]).transpose(0,1).contiguous()
	except:
	return 0


	box_a = prediction.new(prediction.shape)
	box_a[:,:,0] = (prediction[:,:,0] - prediction[:,:,2]/2)
	box_a[:,:,1] = (prediction[:,:,1] - prediction[:,:,3]/2)
	box_a[:,:,2] = (prediction[:,:,0] + prediction[:,:,2]/2)
	box_a[:,:,3] = (prediction[:,:,1] + prediction[:,:,3]/2)
	prediction[:,:,:4] = box_a[:,:,:4]



	batch_size = prediction.size(0)

	output = prediction.new(1, prediction.size(2) + 1)
	write = False


	for ind in range(batch_size):
	#select the image from the batch
	image_pred = prediction[ind]



	#Get the class having maximum score, and the index of that class
	#Get rid of num_classes softmax scores
	#Add the class index and the class score of class having maximum score
	max_conf, max_conf_score = torch.max(image_pred[:,5:5+ num_classes], 1)
	max_conf = max_conf.float().unsqueeze(1)
	max_conf_score = max_conf_score.float().unsqueeze(1)
	seq = (image_pred[:,:5], max_conf, max_conf_score)
	image_pred = torch.cat(seq, 1)



	#Get rid of the zero entries
	non_zero_ind = (torch.nonzero(image_pred[:,4]))


	image_pred_ = image_pred[non_zero_ind.squeeze(),:].view(-1,7)

	#Get the various classes detected in the image
	try:
	img_classes = unique(image_pred_[:,-1])
	except:
	continue
	#WE will do NMS classwise
	for cls in img_classes:
	#get the detections with one particular class
	cls_mask = image_pred_*(image_pred_[:,-1] == cls).float().unsqueeze(1)
	class_mask_ind = torch.nonzero(cls_mask[:,-2]).squeeze()


	image_pred_class = image_pred_[class_mask_ind].view(-1,7)



	#sort the detections such that the entry with the maximum objectness
	#confidence is at the top
	conf_sort_index = torch.sort(image_pred_class[:,4], descending = True )[1]
	image_pred_class = image_pred_class[conf_sort_index]
	idx = image_pred_class.size(0)

	#if nms has to be done
	if nms:
	#For each detection
	for i in range(idx):
	#Get the IOUs of all boxes that come after the one we are looking at
	#in the loop
	try:
	ious = bbox_iou(image_pred_class[i].unsqueeze(0), image_pred_class[i+1:])
	except ValueError:
	break

	except IndexError:
	break

	#Zero out all the detections that have IoU > treshhold
	iou_mask = (ious < nms_conf).float().unsqueeze(1)
	image_pred_class[i+1:] *= iou_mask

	#Remove the non-zero entries
	non_zero_ind = torch.nonzero(image_pred_class[:,4]).squeeze()
	image_pred_class = image_pred_class[non_zero_ind].view(-1,7)



	#Concatenate the batch_id of the image to the detection
	#this helps us identify which image does the detection correspond to
	#We use a linear straucture to hold ALL the detections from the batch
	#the batch_dim is flattened
	#batch is identified by extra batch column


	batch_ind = image_pred_class.new(image_pred_class.size(0), 1).fill_(ind)
	seq = batch_ind, image_pred_class
	if not write:
	output = torch.cat(seq,1)
	write = True
	else:
	out = torch.cat(seq,1)
	output = torch.cat((output,out))

	return output



	def predict_func(model, xs):
	import urllib
	import json
	def letterbox_image(img, inp_dim):
	'''resize image with unchanged aspect ratio using padding'''
	img_w, img_h = img.shape[1], img.shape[0]
	w, h = inp_dim
	new_w = int(img_w * min(w/img_w, h/img_h))
	new_h = int(img_h * min(w/img_w, h/img_h))
	resized_image = cv2.resize(img, (new_w,new_h), interpolation = cv2.INTER_CUBIC)

	canvas = np.full((inp_dim[1], inp_dim[0], 3), 128)
	canvas[(h-new_h)//2:(h-new_h)//2 + new_h,(w-new_w)//2:(w-new_w)//2 + new_w, :] = resized_image

	return canvas
	def prep_image(img, inp_dim):
	"""
	Prepare image for inputting to the neural network.

	Returns a Variable
	"""
	orig_im = img
	dim = orig_im.shape[1], orig_im.shape[0]
	img = (letterbox_image(orig_im, (inp_dim, inp_dim)))
	img_ = img[:,:,::-1].transpose((2,0,1)).copy()
	img_ = torch.from_numpy(img_).float().div(255.0).unsqueeze(0)
	print(img_.shape, dim)
	return img_, orig_im, dim


	import os
	os.environ['CUDA_VISIBLE_DEVICES'] = '6'

	inp_dim = 416
	def rescale(array):
	scaling_factor = inp_dim/dim[0]
	array[0] -= (inp_dim - scaling_factor*dim[0])/2
	array[2] -= (inp_dim - scaling_factor*dim[0])/2
	array[1] -= (inp_dim - scaling_factor*dim[1])/2
	array[3] -= (inp_dim - scaling_factor*dim[1])/2
	return array / scaling_factor
	print(xs)
	results = []
	for x in xs:
	try:
	resp = urllib.request.urlopen(x.decode())
	except: # Handle Network Error
	results.append("404")
	continue
	image = np.asarray(bytearray(resp.read()), dtype = "uint8")
	image = cv2.imdecode(image, cv2.IMREAD_COLOR)
	inp_tensor , orig_im, dim = prep_image(image, inp_dim)
	inp_tensor = inp_tensor.cuda()
	#result = model(torch.autograd.Variable(inp_tensor), True)
	result = model(inp_tensor, True)
	prediction = write_results(result, 0.5, 80, nms = True, nms_conf = 0.4)
	#result_np = prediction.cpu().numpy()
	result_np = prediction.cpu().detach().numpy()
	result_string = json.dumps([{'bbox':rescale(single_res[1:5]).tolist(), 'category': int(single_res[-1])} for single_res in result_np])
	results.append(result_string)
	return results



	if __name__ == '__main__':
	args = arg_parse()

	scales = args.scales


	# scales = [int(x) for x in scales.split(',')]
	#
	#
	#
	# args.reso = int(args.reso)
	#
	# num_boxes = [args.reso//32, args.reso//16, args.reso//8]
	# scale_indices = [3(x*2) for x in num_boxes]
	# scale_indices = list(itertools.accumulate(scale_indices, lambda x,y : x+y))
	#
	#
	# li = []
	# i = 0
	# for scale in scale_indices:
	# li.extend(list(range(i, scale)))
	# i = scale
	#
	# scale_indices = li

	images = args.images
	batch_size = int(args.bs)
	confidence = float(args.confidence)
	nms_thesh = float(args.nms_thresh)
	start = 0

	CUDA = torch.cuda.is_available()

	num_classes = 80
	classes = load_classes('data/coco.names')

	#Set up the neural network
	print("Loading network.....")
	model = Darknet(args.cfgfile)
	model.load_weights(args.weightsfile)
	print("Network successfully loaded")

	model.net_info["height"] = args.reso
	inp_dim = int(model.net_info["height"])
	assert inp_dim % 32 == 0
	assert inp_dim > 32
	print("Loading to CUDA")
	#If there's a GPU availible, put the model on GPU
	if CUDA:
	model.cuda()


	#Set the model in evaluation mode
	model.eval()

	print("Model loaded to CUDA")

	xs = ['https://s3-us-west-2.amazonaws.com/scalabel-public/demo/frames/intersection-0000051.jpg']
	import time
	print("start predict")
	start = time.time()
	#print(predict_func(model, xs))
	print("time", time.time() - start)

	import sys
	#sys.exit(0)

	from clipper_admin import ClipperConnection, DockerContainerManager

	from clipper_admin.deployers.pytorch import deploy_pytorch_model
	clipper_conn = ClipperConnection(DockerContainerManager())
	clipper_conn.connect()
	deploy_pytorch_model(
	clipper_conn=clipper_conn,
	name='yolo',
	version=1,
	input_type='strings',
	func=predict_func,
	pytorch_model=model,
	pkgs_to_install = ['numpy', 'opencv-python==3.2.0.8', 'pillow', 'pandas']
	)
	clipper_conn.link_model_to_app(app_name='scalabel', model_name='yolo')

	import sys
	sys.exit(0)


	xs = ['https://s3-us-west-2.amazonaws.com/scalabel-public/demo/frames/intersection-0000051.jpg']
	import time
	print("start predict")
	start = time.time()
	print(predict_func(model, xs))
	print("time", time.time() - start)

	import sys
	sys.exit(0)





	read_dir = time.time()
	#Detection phase
	try:
	imlist = [osp.join(osp.realpath('.'), images, img) for img in os.listdir(images) if os.path.splitext(img)[1] == '.png' or os.path.splitext(img)[1] =='.jpeg' or os.path.splitext(img)[1] =='.jpg']
	except NotADirectoryError:
	imlist = []
	imlist.append(osp.join(osp.realpath('.'), images))
	except FileNotFoundError:
	print ("No file or directory with the name {}".format(images))
	exit()

	if not os.path.exists(args.det):
	os.makedirs(args.det)

	load_batch = time.time()

	batches = list(map(prep_image, imlist, [inp_dim for x in range(len(imlist))]))
	im_batches = [x[0] for x in batches]
	orig_ims = [x[1] for x in batches]
	im_dim_list = [x[2] for x in batches]
	im_dim_list = torch.FloatTensor(im_dim_list).repeat(1,2)



	if CUDA:
	im_dim_list = im_dim_list.cuda()

	leftover = 0

	if (len(im_dim_list) % batch_size):
	leftover = 1


	if batch_size != 1:
	num_batches = len(imlist) // batch_size + leftover
	im_batches = [torch.cat((im_batches[ibatch_size : min((i + 1)batch_size,
	len(im_batches))])) for i in range(num_batches)]


	i = 0


	write = False
	model(get_test_input(inp_dim, CUDA), CUDA)

	start_det_loop = time.time()

	objs = {}



	for batch in im_batches:
	#load the image
	start = time.time()
	if CUDA:
	batch = batch.cuda()


	#Apply offsets to the result predictions
	#Tranform the predictions as described in the YOLO paper
	#flatten the prediction vector
	# B x (bbox cord x no. of anchors) x grid_w x grid_h --> B x bbox x (all the boxes)
	# Put every proposed box as a row.
	with torch.no_grad():
	prediction = model(Variable(batch), CUDA)

	# prediction = prediction[:,scale_indices]


	#get the boxes with object confidence > threshold
	#Convert the cordinates to absolute coordinates
	#perform NMS on these boxes, and save the results
	#I could have done NMS and saving seperately to have a better abstraction
	#But both these operations require looping, hence
	#clubbing these ops in one loop instead of two.
	#loops are slower than vectorised operations.

	prediction = write_results(prediction, confidence, num_classes, nms = True, nms_conf = nms_thesh)


	if type(prediction) == int:
	i += 1
	continue

	end = time.time()


	# print(end - start)



	prediction[:,0] += i*batch_size




	if not write:
	output = prediction
	write = 1
	else:
	output = torch.cat((output,prediction))




	for im_num, image in enumerate(imlist[ibatch_size: min((i + 1)batch_size, len(imlist))]):
	im_id = i*batch_size + im_num
	objs = [classes[int(x[-1])] for x in output if int(x[0]) == im_id]
	print("{0:20s} predicted in {1:6.3f} seconds".format(image.split("/")[-1], (end - start)/batch_size))
	print("{0:20s} {1:s}".format("Objects Detected:", " ".join(objs)))
	print("----------------------------------------------------------")
	i += 1


	if CUDA:
	torch.cuda.synchronize()

	try:
	output
	except NameError:
	print("No detections were made")
	exit()

	im_dim_list = torch.index_select(im_dim_list, 0, output[:,0].long())

	scaling_factor = torch.min(inp_dim/im_dim_list,1)[0].view(-1,1)


	output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim_list[:,0].view(-1,1))/2
	output[:,[2,4]] -= (inp_dim - scaling_factor*im_dim_list[:,1].view(-1,1))/2



	output[:,1:5] /= scaling_factor

	for i in range(output.shape[0]):
	output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim_list[i,0])
	output[i, [2,4]] = torch.clamp(output[i, [2,4]], 0.0, im_dim_list[i,1])


	output_recast = time.time()


	class_load = time.time()

	colors = pkl.load(open("pallete", "rb"))


	draw = time.time()


	def write(x, batches, results):
	c1 = tuple(x[1:3].int())
	c2 = tuple(x[3:5].int())
	img = results[int(x[0])]
	cls = int(x[-1])
	label = "{0}".format(classes[cls])
	color = random.choice(colors)
	cv2.rectangle(img, c1, c2,color, 1)
	t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 1 , 1)[0]
	c2 = c1[0] + t_size[0] + 3, c1[1] + t_size[1] + 4
	cv2.rectangle(img, c1, c2,color, -1)
	cv2.putText(img, label, (c1[0], c1[1] + t_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, [225,255,255], 1)
	return img


	list(map(lambda x: write(x, im_batches, orig_ims), output))

	det_names = pd.Series(imlist).apply(lambda x: "{}/det_{}".format(args.det,x.split("/")[-1]))

	list(map(cv2.imwrite, det_names, orig_ims))

	end = time.time()

	print()
	print("SUMMARY")
	print("----------------------------------------------------------")
	print("{:25s}: {}".format("Task", "Time Taken (in seconds)"))
	print()
	print("{:25s}: {:2.3f}".format("Reading addresses", load_batch - read_dir))
	print("{:25s}: {:2.3f}".format("Loading batch", start_det_loop - load_batch))
	print("{:25s}: {:2.3f}".format("Detection (" + str(len(imlist)) + " images)", output_recast - start_det_loop))
	print("{:25s}: {:2.3f}".format("Output Processing", class_load - output_recast))
	print("{:25s}: {:2.3f}".format("Drawing Boxes", end - draw))
	print("{:25s}: {:2.3f}".format("Average time_per_img", (end - load_batch)/len(imlist)))
	print("----------------------------------------------------------")


	torch.cuda.empty_cache()