Created
February 12, 2014 13:04
-
-
Save rjw57/8955209 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Usage: | |
stabilise.py [options] <video> | |
stabilise.py (-h | --help) | |
Options: | |
-s COUNT, --skip=COUNT Skip the first COUNT frames. [default: 0] | |
-d COUNT, --duration=COUNT Process COUNT frames. | |
""" | |
# This script is very hacked together and has a large number of vestigial | |
# "useless" bits from aborted lines of research. It was written in a hurry but | |
# may be useful to others. | |
import logging | |
from docopt import docopt | |
import cv2 | |
import numpy as np | |
from scipy.signal import fftconvolve | |
def shift_cols(im, shifts): | |
out = im.copy() | |
for col_idx in xrange(out.shape[1]): | |
out[:,col_idx,...] = np.roll(out[:,col_idx,...], -shifts[col_idx], axis=0) | |
return out | |
def align(frame, template): | |
if frame.shape != template.shape: | |
raise ValueError('Template must be same shape as frame') | |
frame_l = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) | |
template_l = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY) | |
# Calculate xs and ys to sample from one frame | |
xs, ys = np.meshgrid(np.arange(frame.shape[1]), np.arange(frame.shape[0])) | |
# Calculate window to use in FFT convolve | |
w = np.outer(np.hamming(template.shape[0]), np.hamming(template.shape[1])) | |
# Calculate a normalisation for the cross-correlation | |
ccnorm = 1.0 / fftconvolve(w, w) | |
# Set border of normalisation to zero to avoid overfitting. Borser is set so that there | |
# must be a minimum of half-frame overlap | |
ccnorm[:(template.shape[0]>>1),:] = 0 | |
ccnorm[-(template.shape[0]>>1):,:] = 0 | |
ccnorm[:,:(template.shape[1]>>1)] = 0 | |
ccnorm[:,-(template.shape[1]>>1):] = 0 | |
# Convolve template and frame | |
conv_im = fftconvolve(template_l*w, np.fliplr(np.flipud(frame_l*w))) | |
conv_im *= ccnorm | |
# Find maximum location | |
max_loc = np.unravel_index(conv_im.argmax(), conv_im.shape) | |
# Convert location to shift | |
dy = 0 # max_loc[0] - template.shape[0] + 1 | |
dx = max_loc[1] - template.shape[1] + 1 | |
logging.info('Offset computed to be ({0},{1})'.format(dx, dy)) | |
# Warp image | |
return np.roll(frame, dx, axis=1) | |
def main(): | |
options = docopt(__doc__) | |
logging.basicConfig(level=logging.INFO) | |
vc = cv2.VideoCapture(options['<video>']) | |
for i in xrange(int(options['--skip'])): | |
vc.grab() | |
prev_shifts = [] | |
prev_frame = None | |
idx = 0 | |
while True: | |
ok, frame = vc.read() | |
if not ok: | |
break | |
frame_hsv = cv2.cvtColor(frame, cv2.COLOR_RGB2HSV) | |
# Mask is 0 for sky, 1 for land | |
sky_mask = np.uint8(np.where(np.logical_and(frame_hsv[:,:,0] > 40, frame_hsv[:,:,0] < 120), 255, 0)) | |
#mask_top_pad = sky_mask[int(0.8*frame.shape[0]):-30] | |
#mask_bottom_pad = sky_mask[30:int(0.2*frame.shape[0])] | |
#sky_pad_mask = np.vstack((mask_top_pad, sky_mask, mask_bottom_pad)) | |
sky_pad_mask = sky_mask | |
# Try to find vertical position with as much sky as possible in | |
pre_shift = np.argmin(np.sum(sky_mask, axis=1)) | |
sky_pad_mask = np.roll(sky_pad_mask, -pre_shift, axis=0) | |
sky_dist = cv2.distanceTransform(sky_pad_mask, cv2.cv.CV_DIST_L2, 3) | |
v_shifts = np.int32(np.unwrap(np.argmax(sky_dist, axis=0), frame.shape[0]>>1)) # - mask_top_pad.shape[0] | |
# Fit a 'horizon' | |
xs = np.arange(frame.shape[1]) | |
apron = 50 | |
v_shifts_smoothed = np.array(np.polyval(np.polyfit(xs[apron:-apron], v_shifts[apron:-apron], 1), xs), | |
v_shifts.dtype) | |
prev_shifts.append(v_shifts_smoothed) | |
t_apron = 1 | |
if len(prev_shifts) > t_apron: | |
prev_shifts = prev_shifts[-t_apron:] | |
v_shifts_temporal_smoothed = np.array(np.median(prev_shifts, axis=0), v_shifts.dtype) | |
x_shift = np.argmax(np.max(np.sum(frame,axis=2), axis=0)) | |
x_shift = 0 | |
v_blank = np.zeros((0, frame.shape[1], frame.shape[2]), frame.dtype) | |
corr_frame = np.roll( | |
shift_cols(np.vstack((frame, v_blank)), | |
v_shifts_temporal_smoothed + (frame.shape[0]>>1) + pre_shift), | |
frame.shape[1]>>1-x_shift, axis=1) | |
if prev_frame is not None: | |
corr_frame = align(corr_frame, prev_frame) | |
prev_frame = corr_frame | |
logging.info('Writing frame {0}'.format(idx)) | |
cv2.imwrite('frame-{0:05d}.png'.format(idx), corr_frame) | |
idx += 1 | |
if options['--duration'] is not None and int(options['--duration']) <= idx: | |
break | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment