Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save theSoberSobber/cfea7c0db2d9ab8932e49889ad712fe5 to your computer and use it in GitHub Desktop.
Save theSoberSobber/cfea7c0db2d9ab8932e49889ad712fe5 to your computer and use it in GitHub Desktop.
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
import shutil
debug = False
# UI using ShadCN, fast iteraation, learn how to do streaming UIs though
# prompt user to decide thresholds, by showing two images and asking if similar, that way they get a good product too!
# stream logs to frontend
# also a tqdm cus ulta iterate kar rahe anyways have a progress percentage, can stream that to frotend
def compute_ssim(img1, img2):
"""Compute the structural similarity index between two images."""
c1 = (0.01 * 255)**2
c2 = (0.03 * 255)**2
img1 = img1.astype(np.float64)
img2 = img2.astype(np.float64)
kernel = cv2.getGaussianKernel(11, 1.5)
window = np.outer(kernel, kernel.transpose())
mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5]
mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
mu1_sq = mu1**2
mu2_sq = mu2**2
mu1_mu2 = mu1 * mu2
sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
ssim_map = ((2 * mu1_mu2 + c1) * (2 * sigma12 + c2)) / ((mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2))
return ssim_map.mean()
def extract_key_frames(video_path, output_folder, similarity_threshold=0.6, time_interval=1):
"""Extract key frames from the video."""
video = cv2.VideoCapture(video_path)
fps = video.get(cv2.CAP_PROP_FPS)
frame_interval = int(fps * time_interval)
frame_count = 0
frames = []
while True:
ret, frame =
if not ret:
if frame_count % frame_interval == 0:
gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
frames.append((frame, gray_frame, frame_count / fps))
frame_count += 1
frame_count = len(frames)
sliding_window_length = 20
fall_threshold = 30 # if decreased would make skips more probable (for 0.3 write 3000 and so on)
normalizer_fall_threshold = 100
prev_gray_frame = None
prev_frame = None
prev_frame_index = None
prev_timestamp = None
key_frames = []
for frame_index in range(frame_count-1, -1, -1):
# print(frame_index)
current_frame, current_gray_frame, current_timestamp = frames[frame_index]
if prev_frame is None:
key_frames.append((current_frame, frame_index, current_timestamp))
if debug: cv2.imwrite(os.path.join(output_folder, f"_{frame_index}.jpg"), current_frame)
print(f"Commiting new frame {frame_index} that had DUMMY similarity with prev_frame {prev_frame_index} and had no abrupt fall in SSIM in the next 20 in sliding window")
prev_frame = current_frame
prev_gray_frame = current_gray_frame
prev_frame_index = frame_index
prev_timestamp = current_timestamp
# there is a chance ki ye keyframe ho, unless haath hai, which we will check by sliding window
if compute_ssim(current_gray_frame, prev_gray_frame) < similarity_threshold:
future_ssims = []
jumped_disturbance = False
for future_frame_index in range(frame_index, max(frame_index-sliding_window_length, 0), -1):
future_frame, gray_future_frame, future_timestamp = frames[future_frame_index]
if len(future_ssims) and future_ssims[-1] - compute_ssim(prev_gray_frame, gray_future_frame) >= (fall_threshold/normalizer_fall_threshold):
# then let's jump here
prev_frame = future_frame
prev_gray_frame = gray_future_frame
prev_frame_index = future_frame_index
prev_timestamp = future_timestamp
frame_index = future_frame_index
jumped_disturbance = True
print(f"Jumped Disturbance from {frame_index + 1} to {future_frame_index}...")
future_ssims.append(compute_ssim(prev_gray_frame, gray_future_frame))
if jumped_disturbance:
# don't need to commit anything then
# isn't a temp disturbance, commit this frame
print(f"Commiting new frame {frame_index} that had {compute_ssim(current_gray_frame, prev_gray_frame)} similarity with prev_frame {prev_frame_index} and had no abrupt fall in SSIM in the next 20 in sliding window")
if debug: cv2.imwrite(os.path.join(output_folder, f"_{frame_index}.jpg"), current_frame)
key_frames.append((current_frame, frame_index, current_timestamp))
prev_frame = current_frame
prev_frame_index = frame_index
prev_gray_frame = current_gray_frame
prev_timestamp = current_timestamp
return key_frames[::-1]
# Usage
video_path = "tests/test-2.mp4" # Assuming the video is named test.mp4
output_folder = "key_frames"
key_frames = extract_key_frames(video_path, output_folder)
images = []
if os.path.exists(output_folder):
shutil.rmtree(output_folder, ignore_errors=True)
for frame, frame_index, timestamp in key_frames:
img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
draw = ImageDraw.Draw(img)
font = ImageFont.load_default()
draw.text((10, 10), f"Time: {timestamp:.2f}s", font=font, fill=(255, 255, 255)), f"_{frame_index}.jpg"))
images[0].save("output.pdf", save_all=True, append_images=images[1:])
shutil.rmtree(output_folder, ignore_errors=True)
Copy link

theSoberSobber commented Oct 5, 2024

yt-dlp <url> -o tests/test-1.mp4

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment