Created
March 22, 2019 12:02
-
-
Save pondahai/558e30bd70330a935a90967dca115644 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -------------------------------------------------------- | |
# Camera Caffe sample code for Tegra X2/X1 | |
# | |
# This program captures and displays video from IP CAM, | |
# USB webcam, or the Tegra onboard camera, and do real-time | |
# image classification (inference) with Caffe. Refer to the | |
# following blog post for how to set up and run the code: | |
# | |
# https://jkjung-avt.github.io/tx2-camera-caffe/ | |
# | |
# Written by JK Jung <[email protected]> | |
# Modify by dahai <[email protected]> | |
# -------------------------------------------------------- | |
import os | |
import sys | |
import argparse | |
import cv2 | |
import numpy as np | |
import caffe | |
from caffe.proto import caffe_pb2 | |
DEFAULT_PROTOTXT = "models/bvlc_reference_caffenet/deploy.prototxt" | |
DEFAULT_MODEL = "models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel" | |
DEFAULT_LABELS = "data/ilsvrc12/synset_words.txt" | |
DEFAULT_MEAN = "data/ilsvrc12/imagenet_mean.binaryproto" | |
windowName = "CameraCaffeDemo" | |
helpText = "'Esc' to Quit, 'H' to Toggle Help, 'F' to Toggle Fullscreen" | |
def parse_args(): | |
""" | |
Parse input arguments | |
""" | |
parser = argparse.ArgumentParser(description="Capture and display live camera video, and do real-time image classification with Caffe on Jetson TX2/TX1") | |
parser.add_argument("--vid", dest="video_dev", | |
help="video device # of USB webcam (/dev/video?) [1]", | |
default=0, type=int) | |
parser.add_argument("--width", dest="image_width", | |
help="image width [640]", | |
default=640, type=int) | |
parser.add_argument("--height", dest="image_height", | |
help="image height [480]", | |
default=480, type=int) | |
parser.add_argument("--cpu", dest="cpu_mode", | |
help="use CPU mode for Caffe (GPU mode is used by default)", | |
action="store_true") | |
parser.add_argument("--crop", dest="crop_center", | |
help="crop the square at center of image for Caffe inferencing [False]", | |
action="store_true") | |
parser.add_argument("--prototxt", dest="caffe_prototxt", | |
help="[{}]".format(DEFAULT_PROTOTXT), | |
default=DEFAULT_PROTOTXT, type=str) | |
parser.add_argument("--model", dest="caffe_model", | |
help="[{}]".format(DEFAULT_MODEL), | |
default=DEFAULT_MODEL, type=str) | |
parser.add_argument("--labels", dest="caffe_labels", | |
help="[{}]".format(DEFAULT_LABELS), | |
default=DEFAULT_LABELS, type=str) | |
parser.add_argument("--mean", dest="caffe_mean", | |
help="[{}]".format(DEFAULT_MEAN), | |
default=DEFAULT_MEAN, type=str) | |
parser.add_argument("--output", dest="caffe_output", | |
help='name of Caffe output blob [prob]', | |
default="prob", type=str) | |
args = parser.parse_args() | |
return args | |
def get_caffe_mean(filename): | |
mean_blob = caffe_pb2.BlobProto() | |
with open(filename, "rb") as f: | |
mean_blob.ParseFromString(f.read()) | |
mean_array = np.asarray(mean_blob.data, dtype=np.float32).reshape( | |
(mean_blob.channels, mean_blob.height, mean_blob.width)) | |
return mean_array.mean(1).mean(1) | |
def open_cam_usb(dev, width, height): | |
# We want to set width and height here, otherwise we could just do: | |
# return cv2.VideoCapture(dev) | |
return cv2.VideoCapture(dev) | |
def open_window(windowName, width, height): | |
cv2.namedWindow(windowName, cv2.WINDOW_NORMAL) | |
cv2.resizeWindow(windowName, width, height) | |
cv2.moveWindow(windowName, 0, 0) | |
cv2.setWindowTitle(windowName, "Camera Caffe Classification Demo for Jetson TX2/TX1") | |
def show_top_preds(img, top_probs, top_labels): | |
x = 10 | |
y = 40 | |
font = cv2.FONT_HERSHEY_PLAIN | |
for i in range(len(top_probs)): | |
pred = "{:.4f} {:20s}".format(top_probs[i], top_labels[i]) | |
#cv2.putText(img, pred, | |
# (x+1,y), font, 1.0, (32,32,32), 4, cv2.LINE_AA) | |
cv2.putText(img, pred, | |
(x,y), font, 1.0, (0,0,240), 1, cv2.LINE_AA) | |
y += 20 | |
def read_cam_and_classify(windowName, cap, net, transformer, labels, caffe_output, crop): | |
showHelp = True | |
showFullScreen = False | |
font = cv2.FONT_HERSHEY_PLAIN | |
while True: | |
if cv2.getWindowProperty(windowName, 0) < 0: # Check to see if the user closed the window | |
# This will fail if the user closed the window; Nasties get printed to the console | |
break; | |
ret_val, img = cap.read() | |
if crop: | |
height, width, channels = img.shape | |
if height < width: | |
img_crop = img[:, ((width-height)//2):((width+height)//2), :] | |
else: | |
img_crop = img[((height-width)//2):((height+width)//2), :, :] | |
else: | |
img_crop = img; | |
# inferencing the image | |
net.blobs["data"].data[...] = transformer.preprocess("data", img_crop) | |
output = net.forward() | |
output_prob = output[caffe_output][0] # output["prob"][0] | |
top_inds = output_prob.argsort()[::-1][:3] | |
top_probs = output_prob[top_inds] | |
top_labels = labels[top_inds] | |
show_top_preds(img, top_probs, top_labels) | |
if showHelp == True: | |
cv2.putText(img, helpText, (11,20), font, 1.0, (32,32,32), 4, cv2.LINE_AA) | |
cv2.putText(img, helpText, (10,20), font, 1.0, (240,240,240), 1, cv2.LINE_AA) | |
cv2.imshow(windowName, img) | |
key = cv2.waitKey(10) | |
if key == 27: # ESC key: quit program | |
break | |
elif key == ord('H') or key == ord('h'): # toggle help message | |
showHelp = not showHelp | |
elif key == ord('F') or key == ord('f'): # toggle fullscreen | |
showFullScreen = not showFullScreen | |
if showFullScreen == True: | |
cv2.setWindowProperty(windowName, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN) | |
else: | |
cv2.setWindowProperty(windowName, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_NORMAL) | |
if __name__ == "__main__": | |
args = parse_args() | |
print("Called with args:") | |
print(args) | |
print("OpenCV version: {}".format(cv2.__version__)) | |
if not os.path.isfile(args.caffe_prototxt): | |
sys.exit("File not found: {}".format(args.caffe_prototxt)) | |
if not os.path.isfile(args.caffe_model): | |
sys.exit("File not found: {}".format(args.caffe_model)) | |
if not os.path.isfile(args.caffe_labels): | |
sys.exit("File not found: {}".format(args.caffe_labels)) | |
if not os.path.isfile(args.caffe_mean): | |
sys.exit("File not found: {}".format(args.caffe_mean)) | |
# initialize Caffe | |
if args.cpu_mode: | |
print("Running Caffe in CPU mode") | |
caffe.set_mode_cpu() | |
else: | |
print("Running Caffe in GPU mode") | |
caffe.set_device(0) | |
caffe.set_mode_gpu() | |
net = caffe.Net(args.caffe_prototxt, args.caffe_model, caffe.TEST) | |
mu = get_caffe_mean(args.caffe_mean) | |
print("Mean-subtracted values:", zip('BGR', mu)) | |
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape}) | |
transformer.set_transpose("data", (2,0,1)) | |
transformer.set_mean("data", mu) | |
# no need to to swap color channels since captured images are already BGR | |
labels = np.loadtxt(args.caffe_labels, str, delimiter='\t') | |
# initialize camera | |
cap = open_cam_usb(args.video_dev, args.image_width, args.image_height) | |
if not cap.isOpened(): | |
sys.exit("Failed to open camera!") | |
# start capturing live video and do inference | |
open_window(windowName, args.image_width, args.image_height) | |
read_cam_and_classify(windowName, cap, net, transformer, labels, | |
args.caffe_output, args.crop_center) | |
cap.release() | |
cv2.destroyAllWindows() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment