Created
February 11, 2021 21:45
-
-
Save eliaperantoni/05769fcf06c318e070f63d2805e39056 to your computer and use it in GitHub Desktop.
Video Stabilization
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function [amplitude] = amplitude(image) | |
if ndims(image) == 3 | |
image = rgb2gray(image); | |
end | |
F = fftshift(fft2(image)); | |
amplitude = abs(F); | |
amplitude = log10(1+amplitude); | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function [pad] = buildpad(image) | |
if ndims(image) == 3 | |
image = rgb2gray(image); | |
end | |
[m, n] = size(image); | |
% Diagonal length | |
d = sqrt(m*m+n*n); | |
% How much to pad vertically and horizontally to allow crop-free rotations | |
pad = [round((d-m)/2) round((d-n)/2)]; | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function offset = findoffset(image, template) | |
if ndims(image) == 3 | |
image = rgb2gray(image); | |
end | |
if ndims(template) == 3 | |
template = rgb2gray(template); | |
end | |
% xcorr contains the cross correlation coefficients | |
xcorr = normxcorr2(template, image); | |
[~, max_ind] = max(xcorr(:)); | |
% These are basically the coords of the bottom right corner of the | |
% template | |
[y_peak, x_peak] = ind2sub(size(xcorr), max_ind); | |
% These are basically the coords of the top left corner of the | |
% template. Or "how to translate the template (that starts with its top | |
% left corner matching the one of the frame) to match it to the | |
% underlying image" | |
offset = [y_peak - size(template,1) x_peak - size(template,2)]; | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
close all | |
clear all | |
clc | |
% If set to `true` enables the writing of the resulting video to a file | |
WRITE = true; | |
ANGLE_MAX = 0.3; | |
ANGLE_STEP = 0.1; | |
reader = VideoReader('samples_farm/input/22.mp4'); | |
frames_n = reader.NumFrames; | |
fprintf('PROCESSING %d FRAMES\n', frames_n); | |
first_frame = reader.readFrame(); | |
[m, n, ~] = size(first_frame); | |
% Used to pad any frame to such a size that allows rotation without any | |
% cropping | |
pad = buildpad(first_frame); | |
figure; | |
[template, template_rect] = imcrop(first_frame); | |
% `template_rect` is a 4-vector like [x y width height] | |
template_rect = round(template_rect); | |
offset_initial = findoffset(padarray(first_frame, pad), template); | |
% `template_padded` will be an image with the same size as the first frame | |
% but will be all black expect for the regione enclosing theselected template | |
template_padded = zeros(m, n, 'uint8'); | |
template_padded(... | |
template_rect(2): template_rect(2) + size(template, 1) - 1, ... | |
template_rect(1): template_rect(1) + size(template, 2) - 1 ... | |
) = rgb2gray(template); | |
template_fft = amplitude(template_padded); | |
input = {first_frame}; | |
for i = 2:frames_n | |
input{i} = reader.readFrame(); | |
end | |
output = {[first_frame first_frame]}; | |
parfor i = 2:frames_n | |
frame = input{i}; | |
frame_fft = amplitude(frame); | |
best_theta = 0; | |
best_corr = -Inf; | |
for theta = -ANGLE_MAX:ANGLE_STEP:ANGLE_MAX | |
corr = corr2(template_fft, imrotate(frame_fft, theta, 'bilinear', 'crop')); | |
if corr > best_corr | |
best_theta = theta; | |
best_corr = corr; | |
end | |
end | |
fprintf('BEST THETA %.1f\n', best_theta); | |
frame = padarray(frame, pad); | |
frame = imrotate(frame, best_theta, 'bilinear', 'crop'); | |
offset = findoffset(frame, template); | |
% findoffset's return value is [dy dx] (as is normxcorr2) but imtranslate | |
% wants a [dx dy] matrix, that's why we need to flip. | |
shift = flip(offset_initial - offset); | |
frame = imtranslate(frame, shift, 'FillValues', 0); | |
% Revert the padding | |
frame = frame(pad(1)+1:pad(1)+m, pad(2)+1:pad(2)+n, :); | |
output{i} = [input{i} frame]; | |
fprintf('COMPLETED FRAME %d\n', i); | |
end | |
fprintf('WRITING\n'); | |
if WRITE | |
writer = VideoWriter('stabilized_video'); | |
open(writer); | |
for i = 1:frames_n | |
writeVideo(writer, output{i}); | |
end | |
close(writer); | |
end | |
fprintf('DONE\n'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment