rjw57 · February 12, 2014 13:04
diff --git a/stabilise.py b/stabilise.py
 #!/usr/bin/env python
 """
 Usage:
    stabilise.py [options] <video>
    stabilise.py (-h | --help)

 Options:
    -s COUNT, --skip=COUNT      Skip the first COUNT frames. [default: 0]
    -d COUNT, --duration=COUNT  Process COUNT frames.
 """

 # This script is very hacked together and has a large number of vestigial
 # "useless" bits from aborted lines of research. It was written in a hurry but
 # may be useful to others.

 import logging

 from docopt import docopt
 import cv2
 import numpy as np
 from scipy.signal import fftconvolve

 def shift_cols(im, shifts):
    out = im.copy()
    for col_idx in xrange(out.shape[1]):
        out[:,col_idx,...] = np.roll(out[:,col_idx,...], -shifts[col_idx], axis=0)
    return out

 def align(frame, template):
    if frame.shape != template.shape:
        raise ValueError('Template must be same shape as frame')

    frame_l = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    template_l = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)

    # Calculate xs and ys to sample from one frame
    xs, ys = np.meshgrid(np.arange(frame.shape[1]), np.arange(frame.shape[0]))

    # Calculate window to use in FFT convolve
    w = np.outer(np.hamming(template.shape[0]), np.hamming(template.shape[1]))

    # Calculate a normalisation for the cross-correlation
    ccnorm = 1.0 / fftconvolve(w, w)

    # Set border of normalisation to zero to avoid overfitting. Borser is set so that there
    # must be a minimum of half-frame overlap
    ccnorm[:(template.shape[0]>>1),:] = 0
    ccnorm[-(template.shape[0]>>1):,:] = 0
    ccnorm[:,:(template.shape[1]>>1)] = 0
    ccnorm[:,-(template.shape[1]>>1):] = 0

    # Convolve template and frame
    conv_im = fftconvolve(template_l*w, np.fliplr(np.flipud(frame_l*w)))
    conv_im *= ccnorm

    # Find maximum location
    max_loc = np.unravel_index(conv_im.argmax(), conv_im.shape)

    # Convert location to shift
    dy = 0 # max_loc[0] - template.shape[0] + 1
    dx = max_loc[1] - template.shape[1] + 1
    logging.info('Offset computed to be ({0},{1})'.format(dx, dy))

    # Warp image
    return np.roll(frame, dx, axis=1)

 def main():
    options = docopt(__doc__)
    logging.basicConfig(level=logging.INFO)

    vc = cv2.VideoCapture(options['<video>'])
    for i in xrange(int(options['--skip'])):
        vc.grab()

    prev_shifts = []
    prev_frame = None

    idx = 0
    while True:
        ok, frame = vc.read()
        if not ok:
            break

        frame_hsv = cv2.cvtColor(frame, cv2.COLOR_RGB2HSV)

        # Mask is 0 for sky, 1 for land
        sky_mask = np.uint8(np.where(np.logical_and(frame_hsv[:,:,0] > 40, frame_hsv[:,:,0] < 120), 255, 0))
        #mask_top_pad = sky_mask[int(0.8*frame.shape[0]):-30]
        #mask_bottom_pad = sky_mask[30:int(0.2*frame.shape[0])]
        #sky_pad_mask = np.vstack((mask_top_pad, sky_mask, mask_bottom_pad))
        sky_pad_mask = sky_mask

        # Try to find vertical position with as much sky as possible in
        pre_shift = np.argmin(np.sum(sky_mask, axis=1))
        sky_pad_mask = np.roll(sky_pad_mask, -pre_shift, axis=0)

        sky_dist = cv2.distanceTransform(sky_pad_mask, cv2.cv.CV_DIST_L2, 3)

        v_shifts = np.int32(np.unwrap(np.argmax(sky_dist, axis=0), frame.shape[0]>>1)) # - mask_top_pad.shape[0]

        # Fit a 'horizon'
        xs = np.arange(frame.shape[1])
        apron = 50
        v_shifts_smoothed = np.array(np.polyval(np.polyfit(xs[apron:-apron], v_shifts[apron:-apron], 1), xs),
                v_shifts.dtype)

        prev_shifts.append(v_shifts_smoothed)
        t_apron = 1
        if len(prev_shifts) > t_apron:
            prev_shifts = prev_shifts[-t_apron:]
        v_shifts_temporal_smoothed = np.array(np.median(prev_shifts, axis=0), v_shifts.dtype)

        x_shift = np.argmax(np.max(np.sum(frame,axis=2), axis=0))
        x_shift = 0
        v_blank = np.zeros((0, frame.shape[1], frame.shape[2]), frame.dtype)

        corr_frame = np.roll(
                shift_cols(np.vstack((frame, v_blank)),
                           v_shifts_temporal_smoothed + (frame.shape[0]>>1) + pre_shift),
                frame.shape[1]>>1-x_shift, axis=1)

        if prev_frame is not None:
            corr_frame = align(corr_frame, prev_frame)
        prev_frame = corr_frame

        logging.info('Writing frame {0}'.format(idx))
        cv2.imwrite('frame-{0:05d}.png'.format(idx), corr_frame)
        idx += 1

        if options['--duration'] is not None and int(options['--duration']) <= idx:
            break

 if __name__ == '__main__':
    main()
	#!/usr/bin/env python
	"""
	Usage:
	stabilise.py [options] <video>
	stabilise.py (-h \| --help)

	Options:
	-s COUNT, --skip=COUNT Skip the first COUNT frames. [default: 0]
	-d COUNT, --duration=COUNT Process COUNT frames.
	"""

	# This script is very hacked together and has a large number of vestigial
	# "useless" bits from aborted lines of research. It was written in a hurry but
	# may be useful to others.

	import logging

	from docopt import docopt
	import cv2
	import numpy as np
	from scipy.signal import fftconvolve

	def shift_cols(im, shifts):
	out = im.copy()
	for col_idx in xrange(out.shape[1]):
	out[:,col_idx,...] = np.roll(out[:,col_idx,...], -shifts[col_idx], axis=0)
	return out

	def align(frame, template):
	if frame.shape != template.shape:
	raise ValueError('Template must be same shape as frame')

	frame_l = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	template_l = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)

	# Calculate xs and ys to sample from one frame
	xs, ys = np.meshgrid(np.arange(frame.shape[1]), np.arange(frame.shape[0]))

	# Calculate window to use in FFT convolve
	w = np.outer(np.hamming(template.shape[0]), np.hamming(template.shape[1]))

	# Calculate a normalisation for the cross-correlation
	ccnorm = 1.0 / fftconvolve(w, w)

	# Set border of normalisation to zero to avoid overfitting. Borser is set so that there
	# must be a minimum of half-frame overlap
	ccnorm[:(template.shape[0]>>1),:] = 0
	ccnorm[-(template.shape[0]>>1):,:] = 0
	ccnorm[:,:(template.shape[1]>>1)] = 0
	ccnorm[:,-(template.shape[1]>>1):] = 0

	# Convolve template and frame
	conv_im = fftconvolve(template_lw, np.fliplr(np.flipud(frame_lw)))
	conv_im *= ccnorm

	# Find maximum location
	max_loc = np.unravel_index(conv_im.argmax(), conv_im.shape)

	# Convert location to shift
	dy = 0 # max_loc[0] - template.shape[0] + 1
	dx = max_loc[1] - template.shape[1] + 1
	logging.info('Offset computed to be ({0},{1})'.format(dx, dy))

	# Warp image
	return np.roll(frame, dx, axis=1)

	def main():
	options = docopt(__doc__)
	logging.basicConfig(level=logging.INFO)

	vc = cv2.VideoCapture(options['<video>'])
	for i in xrange(int(options['--skip'])):
	vc.grab()

	prev_shifts = []
	prev_frame = None

	idx = 0
	while True:
	ok, frame = vc.read()
	if not ok:
	break

	frame_hsv = cv2.cvtColor(frame, cv2.COLOR_RGB2HSV)

	# Mask is 0 for sky, 1 for land
	sky_mask = np.uint8(np.where(np.logical_and(frame_hsv[:,:,0] > 40, frame_hsv[:,:,0] < 120), 255, 0))
	#mask_top_pad = sky_mask[int(0.8*frame.shape[0]):-30]
	#mask_bottom_pad = sky_mask[30:int(0.2*frame.shape[0])]
	#sky_pad_mask = np.vstack((mask_top_pad, sky_mask, mask_bottom_pad))
	sky_pad_mask = sky_mask

	# Try to find vertical position with as much sky as possible in
	pre_shift = np.argmin(np.sum(sky_mask, axis=1))
	sky_pad_mask = np.roll(sky_pad_mask, -pre_shift, axis=0)

	sky_dist = cv2.distanceTransform(sky_pad_mask, cv2.cv.CV_DIST_L2, 3)

	v_shifts = np.int32(np.unwrap(np.argmax(sky_dist, axis=0), frame.shape[0]>>1)) # - mask_top_pad.shape[0]

	# Fit a 'horizon'
	xs = np.arange(frame.shape[1])
	apron = 50
	v_shifts_smoothed = np.array(np.polyval(np.polyfit(xs[apron:-apron], v_shifts[apron:-apron], 1), xs),
	v_shifts.dtype)

	prev_shifts.append(v_shifts_smoothed)
	t_apron = 1
	if len(prev_shifts) > t_apron:
	prev_shifts = prev_shifts[-t_apron:]
	v_shifts_temporal_smoothed = np.array(np.median(prev_shifts, axis=0), v_shifts.dtype)

	x_shift = np.argmax(np.max(np.sum(frame,axis=2), axis=0))
	x_shift = 0
	v_blank = np.zeros((0, frame.shape[1], frame.shape[2]), frame.dtype)

	corr_frame = np.roll(
	shift_cols(np.vstack((frame, v_blank)),
	v_shifts_temporal_smoothed + (frame.shape[0]>>1) + pre_shift),
	frame.shape[1]>>1-x_shift, axis=1)

	if prev_frame is not None:
	corr_frame = align(corr_frame, prev_frame)
	prev_frame = corr_frame

	logging.info('Writing frame {0}'.format(idx))
	cv2.imwrite('frame-{0:05d}.png'.format(idx), corr_frame)
	idx += 1

	if options['--duration'] is not None and int(options['--duration']) <= idx:
	break

	if __name__ == '__main__':
	main()