Created
November 15, 2016 09:02
-
-
Save ck196/e22fc3c633678185522f292b6ac506c3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division | |
import os | |
import math | |
import numpy as np | |
import json | |
from os import listdir | |
from os.path import isfile, join | |
#from nms.gpu_nms import gpu_nms | |
#import gpu_nms | |
import sys | |
import glob | |
import cv2 | |
import argparse | |
import re | |
import time | |
from scipy.misc import imread | |
sys.path.insert(0, "./python") | |
import caffe | |
from nms.gpu_nms import gpu_nms | |
GPU_ID = 0 | |
def filter_proposals(proposals, threshold=-10): | |
#Bug 1 Fixed | |
keeps = (proposals[:, -1] >= threshold) & (proposals[:, 3] != 0) & (proposals[:, 4] != 0) | |
return keeps | |
def im_normalize(im, target_size, mu=[104, 117, 123] ): | |
n_im = cv2.resize(im, target_size).astype(np.float32) | |
#Substracts mu from testing-BGR image | |
n_im -= mu | |
#print(im.shape) | |
n_im = np.swapaxes(n_im, 1,2) | |
n_im = np.swapaxes(n_im, 0,1) | |
n_im = np.array([n_im]) | |
#print(n_im.shape) | |
#print(n_im.shape) | |
return n_im | |
def bbox_denormalize(bbox_pred, proposals, ratios, orgW, orgH, do_bb_norm=True): | |
bbox_means = [0, 0, 0, 0] | |
bbox_stds = [0.1, 0.1, 0.2, 0.2] | |
if do_bb_norm: | |
bbox_pred *= bbox_stds | |
bbox_pred += bbox_means | |
ctr_x = proposals[:,0]+0.5*proposals[:,2] | |
ctr_y = proposals[:,1]+0.5*proposals[:,3] | |
tx = bbox_pred[:,0] *proposals[:,2] + ctr_x | |
ty = bbox_pred[:,1] *proposals[:,3] + ctr_y | |
tw = proposals[:,2] * np.exp(bbox_pred[:,2]) | |
th = proposals[:,3] * np.exp(bbox_pred[:,3]) | |
#Fix Bug 2 | |
tx -= tw/2 | |
ty -= th/2 | |
tx /= ratios[0] | |
tw /= ratios[0] | |
ty /= ratios[1] | |
th /= ratios[1] | |
tx[tx < 0] = 0 | |
ty[ty < 0] = 0 | |
#Fix Bug 3 | |
tw[tw > (orgW - tx)] = (orgW - tx[tw > (orgW - tx)]) | |
th[th > (orgH - ty)] = (orgH - ty[th > (orgH - ty)]) | |
new_boxes = np.hstack((tx[:, None], ty[:, None], tw[:, None], th[:, None])).astype(np.float32).reshape((-1, 4)) #suspecious | |
return new_boxes | |
def get_confidence(exp_score,cls_indx): | |
#exp_score = np.exp(cls_pred) | |
sum_exp_score = np.sum(exp_score, 1) | |
confidence = exp_score[:, cls_indx] / sum_exp_score | |
return confidence | |
CLASSES = ("_background", "pedestrian", "cyclist") | |
COLOR = ((0,0,0), (255,0,255), (255,255,0)) | |
#mu is the mean of BGR | |
def im_detect(net, file_path, target_size= (2560,768)): | |
im = cv2.imread(file_path) | |
#imcp = im.copy() | |
orgH, orgW, _ = im.shape | |
ratios = (target_size[0]/orgW, (target_size[1]/orgH )) | |
im = im_normalize(im, target_size) | |
#imcp = cv2.resize(imcp,target_size) | |
#Feedforward | |
net.blobs['data'].data[...] = im | |
output = net.forward() | |
bbox_pred = output['bbox_pred'] | |
cls_pred = output['cls_pred'] | |
proposals = output['proposals_score'].reshape((-1,6))[:,1:] #suspecious | |
proposals[:,2] -= proposals[:,0] | |
proposals[:,3] -= proposals[:,1] | |
new_scores = np.exp(cls_pred) | |
results = [] | |
keeps = filter_proposals(proposals) | |
bbox_pred = bbox_pred[keeps] | |
#cls_pred = cls_pred[keeps] | |
new_scores = new_scores[keeps] | |
proposals = proposals[keeps] | |
for cls_indx, cls in enumerate(CLASSES[1:]): | |
cls_indx += 1 | |
cls_boxes = bbox_pred[:,4*cls_indx:4*(cls_indx + 1)] | |
boxes = bbox_denormalize(cls_boxes, proposals, ratios, orgW, orgH) | |
#Denormalize the confidence | |
confidence = get_confidence(new_scores, cls_indx) | |
results.append((boxes, confidence)) | |
return results | |
def nms(dets, thresh): | |
if dets.shape[0] == 0: | |
return [] | |
new_dets = np.copy(dets) | |
new_dets[:,2] += new_dets[:,0] | |
new_dets[:,3] += new_dets[:,1] | |
return gpu_nms(new_dets, thresh, device_id=GPU_ID) | |
def detect(net, image_path, target_size=(2560,768), CONF_THRESH=0.7, NMS_THRESH=0.3): | |
det_res = im_detect(net, image_path, target_size) | |
results = [] | |
for cls_indx, cls in enumerate(CLASSES[1:]): | |
bboxes, confidence = det_res[cls_indx] | |
dets = np.hstack((bboxes,confidence[:, np.newaxis])).astype(np.float32) | |
keep = nms(dets, NMS_THRESH) | |
#print("{} Bboxes".format(len(keep))) | |
dets = dets[keep, :] | |
inds = np.where(dets[:, -1] >= CONF_THRESH)[0] | |
#results = [] | |
for i in inds: | |
bbox = dets[i, :4] | |
score = dets[i, -1] | |
x = bbox[0] | |
y = bbox[1] | |
width = bbox[2] | |
height = bbox[3] | |
results.append((x, y, width, height, cls_indx + 1, score)) | |
return results | |
def main(): | |
caffe.set_mode_gpu() | |
caffe.set_device(GPU_ID) | |
#global GPU_ID = 0 | |
net = caffe.Net("examples/kitti_ped_cyc/mscnn-8s-768-trainval-pretrained/mscnn_deploy.prototxt", "examples/kitti_ped_cyc/mscnn-8s-768-trainval-pretrained/mscnn_kitti_trainval_2nd_iter_35000.caffemodel", caffe.TEST) | |
res = detect(net, "data/kitti/training/image_2/001068.png") | |
img = cv2.imread("data/kitti/training/image_2/001068.png") | |
for r in res: | |
cv2.rectangle(img, (int(r[0]), int(r[1])), (int(r[0] + r[2]), int(r[1] + r[3])), COLOR[r[4]], 2) | |
#print(res) | |
cv2.imwrite("test.png", img) | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Traceback (most recent call last):
File "ped-cyc-mscnn-detection.py", line 9, in
from nms.gpu_nms import gpu_nms
ImportError: No module named nms.gpu_nms
Internet says that there should be a lib folder where setup.py or make would resolve this conflict.
However, that advice is only available for Faster-RCNN, no relevant lib dir for mscnn.
How did you solve this?