Created
February 22, 2017 23:49
-
-
Save bpinaya/5b7cf01aafd5742e51a3d943aab8fba9 to your computer and use it in GitHub Desktop.
Detectnet on a video video input. Call: python CarDetection.py yourSnap.caffemodel deplot.prototxt input.mp4 output.mp4
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Based on | |
# https://gist.github.com/lukeyeager/777087991419d98700054cade2f755e6 | |
#-------------------------------------------------------------------- | |
# CarDetection runs Detectnet on a video pipeline (TX1 Tested) | |
# This might be run only once if no ffmpeg is installed | |
#import imageio | |
#imageio.plugins.ffmpeg.download() | |
import cv2 | |
import numpy as np | |
import argparse | |
import os | |
import time | |
from google.protobuf import text_format | |
from moviepy.editor import VideoFileClip | |
import scipy.misc | |
os.environ['GLOG_minloglevel'] = '2' # Suppress most caffe output | |
import caffe | |
from caffe.proto import caffe_pb2 | |
def get_net(caffemodel, deploy_file, use_gpu=True): | |
""" | |
Returns an instance of caffe.Net | |
Arguments: | |
caffemodel -- path to a .caffemodel file | |
deploy_file -- path to a .prototxt file | |
Keyword arguments: | |
use_gpu -- if True, use the GPU for inference | |
""" | |
if use_gpu: | |
caffe.set_mode_gpu() | |
# load a new model | |
return caffe.Net(deploy_file, caffemodel, caffe.TEST) | |
def get_transformer(deploy_file, mean_file=None): | |
""" | |
Returns an instance of caffe.io.Transformer | |
Arguments: | |
deploy_file -- path to a .prototxt file | |
Keyword arguments: | |
mean_file -- path to a .binaryproto file (optional) | |
""" | |
network = caffe_pb2.NetParameter() | |
with open(deploy_file) as infile: | |
text_format.Merge(infile.read(), network) | |
if network.input_shape: | |
dims = network.input_shape[0].dim | |
else: | |
dims = network.input_dim[:4] | |
t = caffe.io.Transformer( | |
inputs = {'data': dims} | |
) | |
t.set_transpose('data', (2,0,1)) # transpose to (channels, height, width) | |
# color images | |
if dims[1] == 3: | |
# channel swap | |
t.set_channel_swap('data', (2,1,0)) | |
if mean_file: | |
# set mean pixel | |
with open(mean_file,'rb') as infile: | |
blob = caffe_pb2.BlobProto() | |
blob.MergeFromString(infile.read()) | |
if blob.HasField('shape'): | |
blob_dims = blob.shape | |
assert len(blob_dims) == 4, 'Shape should have 4 dimensions - shape is "%s"' % blob.shape | |
elif blob.HasField('num') and blob.HasField('channels') and \ | |
blob.HasField('height') and blob.HasField('width'): | |
blob_dims = (blob.num, blob.channels, blob.height, blob.width) | |
else: | |
raise ValueError('blob does not provide shape or 4d dimensions') | |
pixel = np.reshape(blob.data, blob_dims[1:]).mean(1).mean(1) | |
t.set_mean('data', pixel) | |
return t | |
def resize_img(image, height, width): | |
""" | |
Resizes the image to detectnet inputs | |
Arguments: | |
image -- a single image | |
height -- height of the network input | |
width -- width of the network input | |
""" | |
image = np.array(image) | |
image = scipy.misc.imresize(image, (height, width), 'bilinear') | |
return image | |
def draw_bboxes(image, locations): | |
""" | |
Draws the bounding boxes into an image | |
Arguments: | |
image -- a single image already resized | |
locations -- the location of the bounding boxes | |
""" | |
for left,top,right,bottom,confidence in locations: | |
if confidence==0: | |
continue | |
cv2.rectangle(image,(left,top),(right,bottom),(255,0,0),3) | |
#cv2.imwrite('bbox.png',image)#test on a single image | |
return image | |
def forward_pass(image, net, transformer, batch_size=None): | |
""" | |
Returns scores for each image as an np.ndarray (nImages x nClasses) | |
Arguments: | |
image -- a list of np.ndarrays | |
net -- a caffe.Net | |
transformer -- a caffe.io.Transformer | |
Keyword arguments: | |
batch_size -- how many images can be processed at once | |
(a high value may result in out-of-memory errors) | |
""" | |
if batch_size is None: | |
batch_size = 1 | |
caffe_images = [] | |
if image.ndim == 2: | |
caffe_images.append(image[:,:,np.newaxis]) | |
else: | |
caffe_images.append(image) | |
dims = transformer.inputs['data'][1:] | |
scores = None | |
for chunk in [caffe_images[x:x+batch_size] for x in xrange(0, len(caffe_images), batch_size)]: | |
new_shape = (len(chunk),) + tuple(dims) | |
if net.blobs['data'].data.shape != new_shape: | |
net.blobs['data'].reshape(*new_shape) | |
for index, image in enumerate(chunk): | |
image_data = transformer.preprocess('data', image) | |
net.blobs['data'].data[index] = image_data | |
start = time.time() | |
output = net.forward()[net.outputs[-1]] | |
end = time.time() | |
if scores is None: | |
scores = np.copy(output) | |
else: | |
scores = np.vstack((scores, output)) | |
print 'Processed %s/%s images in %f seconds ...' % (len(scores), len(caffe_images), (end - start)) | |
return scores | |
def classify(caffemodel, deploy_file, image, | |
mean_file=None, batch_size=None, use_gpu=True): | |
""" | |
Classify some images against a Caffe model and print the results | |
Arguments: | |
caffemodel -- path to a .caffemodel | |
deploy_file -- path to a .prototxt | |
image_files -- list of paths to images | |
Keyword arguments: | |
mean_file -- path to a .binaryproto | |
use_gpu -- if True, run inference on the GPU | |
""" | |
# Load the model | |
net = get_net(caffemodel, deploy_file, use_gpu) | |
transformer = get_transformer(deploy_file, mean_file) | |
_, channels, height, width = transformer.inputs['data'] | |
if channels == 3: | |
mode = 'RGB' | |
elif channels == 1: | |
mode = 'L' | |
else: | |
raise ValueError('Invalid number for channels: %s' % channels) | |
image = resize_img(image,height,width) | |
# Classify the image | |
scores = forward_pass(image, net, transformer, batch_size=batch_size) | |
### Process the results | |
# Format of scores is [ batch_size x max_bbox_per_image x 5 (xl, yt, xr, yb, confidence) ] | |
# https://github.com/NVIDIA/caffe/blob/v0.15.13/python/caffe/layers/detectnet/clustering.py#L81 | |
for i, image_results in enumerate(scores): | |
#print '==> Image #%d' % i | |
img_result = draw_bboxes(image,image_results) | |
# This line is optinal, in this case we resize to the size of the original input video, can be removed | |
#img_result = resize_img(img_result,720,1280) | |
return img_result | |
def detect_car(image): | |
""" | |
Runs our pipeline given a single image and returns another one with the bounding boxes drawn | |
Arguments: | |
image -- cv2 image file | |
""" | |
result = classify(args['caffemodel'], args['deploy_file'], image, | |
args['mean'], args['batch_size'], not args['nogpu']) | |
return result | |
if __name__ == '__main__': | |
global args | |
script_start_time = time.time() | |
parser = argparse.ArgumentParser(description='DetectNet - DIGITS') | |
### Positional arguments | |
parser.add_argument('caffemodel', help='Path to a .caffemodel') | |
parser.add_argument('deploy_file', help='Path to the deploy file') | |
parser.add_argument('video_file', help='Path to a video') | |
parser.add_argument('output_video_file', help='Path to output video name') | |
### Optional arguments | |
parser.add_argument('-m', '--mean', | |
help='Path to a mean file (*.npy)') | |
parser.add_argument('--batch-size', | |
type=int) | |
parser.add_argument('--nogpu', | |
action='store_true', | |
help="Don't use the GPU") | |
args = vars(parser.parse_args()) | |
project_output = args['output_video_file'] | |
clip1 = VideoFileClip(args['video_file']); | |
white_clip = clip1.fl_image(detect_car) | |
white_clip.write_videofile(project_output, audio=False); | |
print 'Video took %f seconds.' % (time.time() - script_start_time) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment