Skip to content

Instantly share code, notes, and snippets.

@zoecarver
Created November 27, 2018 03:41
Show Gist options
  • Save zoecarver/40da8c861cc5074ccc5c200c34f38239 to your computer and use it in GitHub Desktop.
Save zoecarver/40da8c861cc5074ccc5c200c34f38239 to your computer and use it in GitHub Desktop.
import cv2
from cv2 import COLOR_RGB2GRAY
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
import matplotlib.pyplot as plt
from glob import glob
import numpy as np
from random import randint
# MARK - utils
def randcolorvalue():
return float(randint(0, 255))
def randcolor():
return randcolorvalue(), randcolorvalue(), randcolorvalue()
def draw_boxes(image, boxes):
image = np.copy(image)
for box in boxes:
color = randcolor()
x_min, x_max, y_min, y_max = box
cv2.rectangle(image, (x_min, y_min), (x_max, y_max), color, 2)
return image
def get_hog_features(image, visualize=False):
features = hog(
image,
orientations=9,
pixels_per_cell=(8, 8),
cells_per_block=(2, 2),
visualize=visualize,
feature_vector=True,
block_norm='L1'
)
return features
def get_features(images):
if type(images) is not list:
images = [images]
all_features = []
for image in images:
features = get_hog_features(image)
all_features.append(features)
return all_features
def sliding_window(
image,
x_start=0,
x_stop=None,
y_start=0,
y_stop=None,
window=None,
overlap=None
):
if x_stop is None:
x_stop = image.shape[1]
if y_stop is None:
y_stop = image.shape[0]
x_span = x_stop - x_start
y_span = y_stop - y_start
x_pixel_per_step = np.int(window[0] * (1 - overlap[0]))
y_pixel_per_step = np.int(window[1] * (1 - overlap[1]))
x_buffer = np.int(window[0] * overlap[0])
y_buffer = np.int(window[1] * overlap[1])
x_window_count = np.int((x_span - x_buffer) / x_pixel_per_step)
y_window_count = np.int((y_span - y_buffer) / y_pixel_per_step)
window_list = []
for y_index in range(y_window_count):
for x_index in range(x_window_count):
tmp_start_x = x_index * x_pixel_per_step + x_start # tmp so it is not confused with `x_start`
tmp_end_x = tmp_start_x + window[0]
tmp_start_y = y_index * y_pixel_per_step + y_start
tmp_end_y = tmp_start_y + window[1]
window_list.append((tmp_start_x, tmp_end_x, tmp_start_y, tmp_end_y))
return window_list
def predict_windows(image, windows=None, classifier=None):
positive_windows = []
for window in windows:
x_start, x_stop, y_start, y_stop = window
image_selection = image[y_start:y_stop, x_start:x_stop]
image_selection = cv2.resize(image_selection, (64, 64))
# features = get_features(image_selection)
prediction = classifier.predict([image_selection.ravel()])
if prediction == 1.:
positive_windows.append(window)
return positive_windows
# MARK - data porcessing
people_glob = glob('dataset/people/*.png')
background_glob = glob('dataset/not-people/*.png')
people = []
not_people = []
for filename in people_glob:
image = cv2.imread(filename, COLOR_RGB2GRAY)
image = cv2.resize(image, (64, 64))
people.append(image)
for filename in background_glob:
image = cv2.imread(filename, COLOR_RGB2GRAY)
image = cv2.resize(image, (64, 64))
not_people.append(image)
people_len = len(people)
not_people_len = len(not_people)
print('Number of people: %d' % people_len)
print('Number of background images: %d' % not_people_len)
# show 4 of each
_, plots = plt.subplots(4, 2)
for index in range(4):
plots[index, 0].imshow(people[index])
plots[index, 1].imshow(not_people[index])
plt.show()
# get features
test_image = people[0]
test_image_background = not_people[0]
_, plots = plt.subplots(2, 2)
plots[0, 0].imshow(test_image)
plots[0, 1].imshow(get_hog_features(test_image, visualize=True)[1])
plots[1, 0].imshow(test_image_background)
plots[1, 1].imshow(get_hog_features(test_image_background, visualize=True)[1])
plt.show()
people_features = get_features(people)
not_people_features = get_features(not_people)
# MARK - train network
X = np.vstack([[person.ravel() for person in people], [not_p.ravel() for not_p in not_people]])
y = np.concatenate([np.ones(people_len), np.zeros(not_people_len)])
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.2, shuffle=True)
classifier = LinearSVC(verbose=1)
classifier.fit(train_x, train_y)
print('Accuracy: %s' % classifier.score(test_x, test_y)) # Test accuracy
cropped_test_image = cv2.imread('small_test.png', COLOR_RGB2GRAY) # test image
cropped_test_image = cv2.resize(cropped_test_image, (64, 64))
# test_image_features = get_features(cropped_test_image)
prediction = classifier.predict([cropped_test_image.ravel()])
print('Prediction: %d' % prediction[0]) # prediction should be 1
# create actual object detector
test_image = cv2.imread('test.jpg', COLOR_RGB2GRAY) # load test image
test_image = cv2.resize(test_image, (200, 400)) # resize
# get all sliding windows we want
search_windows = \
sliding_window(test_image, y_stop=200, window=(64, 64), overlap=(.7, .7)) + \
sliding_window(test_image, y_stop=250, window=(80, 80), overlap=(.6, .6)) + \
sliding_window(test_image, y_stop=300, window=(96, 96), overlap=(.5, .5))
# sliding_window(test_image, y_stop=350, window=(128, 128), overlap=(.4, .4))
# play with values for sliding window
positive_windows = predict_windows(test_image, windows=search_windows, classifier=classifier)
annotated_image = draw_boxes(test_image, positive_windows) # positive boxes
boxed_image = draw_boxes(test_image, search_windows) # to visualize the search window coverage of our image
# print this out and stop here for explaination
heatmap = np.zeros_like(test_image)
for window in positive_windows:
x_min, x_max, y_min, y_max = window
heatmap[y_min:y_max, x_min:x_max] += 50 # OpenCV being silly
_, plots = plt.subplots(2, 2)
plots[0, 0].imshow(annotated_image)
plots[1, 0].imshow(heatmap)
plots[1, 1].imshow(boxed_image)
plots[0, 1].imshow(test_image)
plt.show()
# MARK - done, video processing
test_video = cv2.VideoCapture('test.MOV')
count = -1
while True:
success, test_image = test_video.read()
if not success: break
count += 1
if count % 5 != 0 or count < 0: continue
test_image = cv2.resize(test_image, (200, 400)) # resize
# get all sliding windows we want
search_windows = \
sliding_window(test_image, y_stop=200, window=(64, 64), overlap=(.7, .7)) + \
sliding_window(test_image, y_stop=250, window=(80, 80), overlap=(.6, .6)) + \
sliding_window(test_image, y_stop=300, window=(96, 96), overlap=(.5, .5)) + \
sliding_window(test_image, y_stop=350, window=(128, 128), overlap=(.4, .4))
# play with values for sliding window
positive_windows = predict_windows(test_image, windows=search_windows, classifier=classifier)
annotated_image = draw_boxes(test_image, positive_windows) # positive boxes
boxed_image = draw_boxes(test_image, search_windows) # to visualize the search window coverage of our image
# print this out and stop here for explaination
heatmap = np.zeros_like(test_image)
for window in positive_windows:
x_min, x_max, y_min, y_max = window
heatmap[y_min:y_max, x_min:x_max] += 50 # OpenCV being silly
cv2.imwrite('output/%i_IMG.png' % count, np.concatenate(
(np.concatenate((annotated_image, heatmap), axis=0), np.concatenate((test_image, boxed_image), axis=0)
), axis=1))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment