Skip to content

Instantly share code, notes, and snippets.

@ChronoMonochrome
Created May 1, 2023 10:29
Show Gist options
  • Save ChronoMonochrome/ff72590a018fe2fd97add80992fb237b to your computer and use it in GitHub Desktop.
Save ChronoMonochrome/ff72590a018fe2fd97add80992fb237b to your computer and use it in GitHub Desktop.
A script to search frame in video using SSIM (structural similarity image comparison)
from time import time
import av
import cv2
import numpy as np
from PIL import Image
from SSIM_PIL import compare_ssim
# Load reference image and get its dimensions
ref_img = cv2.imread("preroll_end2_cropped.png")
ref_height, ref_width, _ = ref_img.shape
# Initialize variables to keep track of matching frames
first_matching_frame = None
# Search ref_img in the frames between fno_lower and fno_upper
fno_lower = 0
fno_upper = 60*25
# Assume the frame doesn't match if the similarity score is below 0.4
similarity_threshold = 0.4
# Open the video file
video_path = "video.mp4"
container = av.open(video_path)
print(container.duration)
def checkFrame(fno):
"""
Reads a video frame at the given frame number and compares its similarity score with a reference image using
compare_ssim. It returns the similarity score.
Args:
fno (int): The frame number to read.
Returns:
float: The similarity score between the reference image and the extracted region of interest from the current frame.
Raises:
IndexError: If the given frame number is out of range.
Usage:
This function is called within findFirstFrame to determine the similarity score of each frame within a particular
range. To use this function, call it with the desired frame number to obtain the similarity score.
"""
brek = False
container.seek(fno // 25 * 1000000)
# Loop over packets until we find a video frame
brek = False
for packet in container.demux():
if brek:
break
for frame in packet.decode():
if type(frame) != av.audio.frame.AudioFrame:
brek = True
break
frame_array = np.array(frame.to_image())
# Extract the ROI using numpy indexing
roi = frame_array[:ref_height, -ref_width:]
# Convert images to PIL format and calculate similarity score
pil_ref_img = Image.fromarray(cv2.cvtColor(ref_img, cv2.COLOR_BGR2RGB))
pil_roi = Image.fromarray(cv2.cvtColor(roi, cv2.COLOR_BGR2RGB))
# Extract the ROI using numpy indexing
roi = frame_array[:ref_height, -ref_width:]
return compare_ssim(pil_ref_img, pil_roi)
def findFirstFrame(fno_lower, fno_upper, similarity_threshold):
"""
Returns the first frame number within the given range [fno_lower, fno_upper] that has a similarity score less than similarity_threshold,
using binary search.
Args:
fno_lower (int): Lower bound of the frame numbers to search for.
fno_upper (int): Upper bound of the frame numbers to search for.
Returns:
int: The first frame number in the range [fno_lower, fno_upper] that has a similarity score less than similarity_threshold as
determined by the checkFrame function.
Usage:
To use this function, call it with the lower and upper bounds of the frame numbers to search through. The function
will then return the first frame number within the range that has a similarity score less than similarity_threshold. The checkFrame
function is used to determine the similarity score for each frame.
"""
left = fno_lower
right = fno_upper
while left < right:
middle = (left + right) // 2
similarity = checkFrame(middle)
print("frame number ", middle, " similarity ", similarity)
if similarity < similarity_threshold:
right = middle
else:
left = middle + 1
return left
t00 = time()
first_matching_frame = findFirstFrame(fno_lower, fno_upper, similarity_threshold)
print("Total time", time() - t00)
# Print first matching frame
if first_matching_frame is not None:
print("First matching frame:", first_matching_frame)
else:
print("No matching frames found")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment