Created
October 5, 2024 21:06
-
-
Save theSoberSobber/cfea7c0db2d9ab8932e49889ad712fe5 to your computer and use it in GitHub Desktop.
https://www.youtube.com/watch?v=9X1rSWLFhLY&list=PL9FuOtXibFjV77w2eyil4Xzp8eooqsPp8 <- wrote for this
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import numpy as np | |
from PIL import Image, ImageDraw, ImageFont | |
import os | |
import shutil | |
debug = False | |
# UI using ShadCN, fast iteraation, learn how to do streaming UIs though | |
# prompt user to decide thresholds, by showing two images and asking if similar, that way they get a good product too! | |
# stream logs to frontend | |
# also a tqdm cus ulta iterate kar rahe anyways have a progress percentage, can stream that to frotend | |
def compute_ssim(img1, img2): | |
"""Compute the structural similarity index between two images.""" | |
c1 = (0.01 * 255)**2 | |
c2 = (0.03 * 255)**2 | |
img1 = img1.astype(np.float64) | |
img2 = img2.astype(np.float64) | |
kernel = cv2.getGaussianKernel(11, 1.5) | |
window = np.outer(kernel, kernel.transpose()) | |
mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5] | |
mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5] | |
mu1_sq = mu1**2 | |
mu2_sq = mu2**2 | |
mu1_mu2 = mu1 * mu2 | |
sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq | |
sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq | |
sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2 | |
ssim_map = ((2 * mu1_mu2 + c1) * (2 * sigma12 + c2)) / ((mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2)) | |
return ssim_map.mean() | |
def extract_key_frames(video_path, output_folder, similarity_threshold=0.6, time_interval=1): | |
"""Extract key frames from the video.""" | |
video = cv2.VideoCapture(video_path) | |
fps = video.get(cv2.CAP_PROP_FPS) | |
frame_interval = int(fps * time_interval) | |
frame_count = 0 | |
frames = [] | |
while True: | |
ret, frame = video.read() | |
if not ret: | |
break | |
if frame_count % frame_interval == 0: | |
gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) | |
frames.append((frame, gray_frame, frame_count / fps)) | |
frame_count += 1 | |
video.release() | |
frame_count = len(frames) | |
sliding_window_length = 20 | |
fall_threshold = 30 # if decreased would make skips more probable (for 0.3 write 3000 and so on) | |
normalizer_fall_threshold = 100 | |
prev_gray_frame = None | |
prev_frame = None | |
prev_frame_index = None | |
prev_timestamp = None | |
key_frames = [] | |
for frame_index in range(frame_count-1, -1, -1): | |
# print(frame_index) | |
current_frame, current_gray_frame, current_timestamp = frames[frame_index] | |
if prev_frame is None: | |
key_frames.append((current_frame, frame_index, current_timestamp)) | |
if debug: cv2.imwrite(os.path.join(output_folder, f"_{frame_index}.jpg"), current_frame) | |
print(f"Commiting new frame {frame_index} that had DUMMY similarity with prev_frame {prev_frame_index} and had no abrupt fall in SSIM in the next 20 in sliding window") | |
prev_frame = current_frame | |
prev_gray_frame = current_gray_frame | |
prev_frame_index = frame_index | |
prev_timestamp = current_timestamp | |
continue | |
# there is a chance ki ye keyframe ho, unless haath hai, which we will check by sliding window | |
if compute_ssim(current_gray_frame, prev_gray_frame) < similarity_threshold: | |
future_ssims = [] | |
jumped_disturbance = False | |
for future_frame_index in range(frame_index, max(frame_index-sliding_window_length, 0), -1): | |
future_frame, gray_future_frame, future_timestamp = frames[future_frame_index] | |
if len(future_ssims) and future_ssims[-1] - compute_ssim(prev_gray_frame, gray_future_frame) >= (fall_threshold/normalizer_fall_threshold): | |
# then let's jump here | |
prev_frame = future_frame | |
prev_gray_frame = gray_future_frame | |
prev_frame_index = future_frame_index | |
prev_timestamp = future_timestamp | |
frame_index = future_frame_index | |
jumped_disturbance = True | |
print(f"Jumped Disturbance from {frame_index + 1} to {future_frame_index}...") | |
break | |
future_ssims.append(compute_ssim(prev_gray_frame, gray_future_frame)) | |
if jumped_disturbance: | |
# don't need to commit anything then | |
continue | |
# isn't a temp disturbance, commit this frame | |
print(f"Commiting new frame {frame_index} that had {compute_ssim(current_gray_frame, prev_gray_frame)} similarity with prev_frame {prev_frame_index} and had no abrupt fall in SSIM in the next 20 in sliding window") | |
if debug: cv2.imwrite(os.path.join(output_folder, f"_{frame_index}.jpg"), current_frame) | |
key_frames.append((current_frame, frame_index, current_timestamp)) | |
prev_frame = current_frame | |
prev_frame_index = frame_index | |
prev_gray_frame = current_gray_frame | |
prev_timestamp = current_timestamp | |
return key_frames[::-1] | |
# Usage | |
video_path = "tests/test-2.mp4" # Assuming the video is named test.mp4 | |
output_folder = "key_frames" | |
key_frames = extract_key_frames(video_path, output_folder) | |
images = [] | |
if os.path.exists(output_folder): | |
shutil.rmtree(output_folder, ignore_errors=True) | |
os.makedirs(output_folder) | |
for frame, frame_index, timestamp in key_frames: | |
img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) | |
draw = ImageDraw.Draw(img) | |
font = ImageFont.load_default() | |
draw.text((10, 10), f"Time: {timestamp:.2f}s", font=font, fill=(255, 255, 255)) | |
img.save(os.path.join(output_folder, f"_{frame_index}.jpg")) | |
images.append(img) | |
images[0].save("output.pdf", save_all=True, append_images=images[1:]) | |
shutil.rmtree(output_folder, ignore_errors=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
https://pypi.org/project/yt-dlp/