theSoberSobber · October 5, 2024 21:06 · theSoberSobber · Oct 5, 2024
diff --git a/SSIM Similarity Based Slides Extractor.py b/SSIM Similarity Based Slides Extractor.py
 import cv2
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 import os
 import shutil

 debug = False

 # UI using ShadCN, fast iteraation, learn how to do streaming UIs though

 # prompt user to decide thresholds, by showing two images and asking if similar, that way they get a good product too!
 # stream logs to frontend
 # also a tqdm cus ulta iterate kar rahe anyways have a progress percentage, can stream that to frotend

 def compute_ssim(img1, img2):
    """Compute the structural similarity index between two images."""
    c1 = (0.01 * 255)**2
    c2 = (0.03 * 255)**2

    img1 = img1.astype(np.float64)
    img2 = img2.astype(np.float64)
    kernel = cv2.getGaussianKernel(11, 1.5)
    window = np.outer(kernel, kernel.transpose())

    mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5]
    mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
    mu1_sq = mu1**2
    mu2_sq = mu2**2
    mu1_mu2 = mu1 * mu2
    sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
    sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
    sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2

    ssim_map = ((2 * mu1_mu2 + c1) * (2 * sigma12 + c2)) / ((mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2))
    return ssim_map.mean()

 def extract_key_frames(video_path, output_folder, similarity_threshold=0.6, time_interval=1):
    """Extract key frames from the video."""

    video = cv2.VideoCapture(video_path)
    fps = video.get(cv2.CAP_PROP_FPS)
    frame_interval = int(fps * time_interval)
    
    frame_count = 0

    frames = []
    while True:
        ret, frame = video.read()
        if not ret:
            break
        if frame_count % frame_interval == 0:
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            frames.append((frame, gray_frame, frame_count / fps))
        frame_count += 1
    video.release()
    frame_count = len(frames)
    
    sliding_window_length = 20
    fall_threshold = 30 # if decreased would make skips more probable (for 0.3 write 3000 and so on)
    normalizer_fall_threshold = 100
    
    prev_gray_frame = None
    prev_frame = None
    prev_frame_index = None
    prev_timestamp = None
    
    key_frames = []
    
    for frame_index in range(frame_count-1, -1, -1):
        # print(frame_index)
        current_frame, current_gray_frame, current_timestamp = frames[frame_index]
        if prev_frame is None:
            key_frames.append((current_frame, frame_index, current_timestamp))
            
            if debug: cv2.imwrite(os.path.join(output_folder, f"_{frame_index}.jpg"), current_frame)
            print(f"Commiting new frame {frame_index} that had DUMMY similarity with prev_frame {prev_frame_index} and had no abrupt fall in SSIM in the next 20 in sliding window")
            
            prev_frame = current_frame
            prev_gray_frame = current_gray_frame
            prev_frame_index = frame_index
            prev_timestamp = current_timestamp
            continue
        
        # there is a chance ki ye keyframe ho, unless haath hai, which we will check by sliding window
        if compute_ssim(current_gray_frame, prev_gray_frame) < similarity_threshold:
            future_ssims = []
            jumped_disturbance = False
            for future_frame_index in range(frame_index, max(frame_index-sliding_window_length, 0), -1):
                future_frame, gray_future_frame, future_timestamp = frames[future_frame_index]
                if len(future_ssims) and future_ssims[-1] - compute_ssim(prev_gray_frame, gray_future_frame) >= (fall_threshold/normalizer_fall_threshold):
                    #  then let's jump here
                    prev_frame = future_frame
                    prev_gray_frame = gray_future_frame
                    prev_frame_index = future_frame_index
                    prev_timestamp = future_timestamp
                    frame_index = future_frame_index
                    jumped_disturbance = True
                    print(f"Jumped Disturbance from {frame_index + 1} to {future_frame_index}...")
                    break
                future_ssims.append(compute_ssim(prev_gray_frame, gray_future_frame))
            
            if jumped_disturbance:
                # don't need to commit anything then
                continue
            
            # isn't a temp disturbance, commit this frame
            print(f"Commiting new frame {frame_index} that had {compute_ssim(current_gray_frame, prev_gray_frame)} similarity with prev_frame {prev_frame_index} and had no abrupt fall in SSIM in the next 20 in sliding window")
            
            
            if debug: cv2.imwrite(os.path.join(output_folder, f"_{frame_index}.jpg"), current_frame)
            
            
            key_frames.append((current_frame, frame_index, current_timestamp))
            
            prev_frame = current_frame
            prev_frame_index = frame_index
            prev_gray_frame = current_gray_frame
            prev_timestamp = current_timestamp
    return key_frames[::-1]

 # Usage
 video_path = "tests/test-2.mp4"  # Assuming the video is named test.mp4
 output_folder = "key_frames"
 key_frames = extract_key_frames(video_path, output_folder)
 images = []

 if os.path.exists(output_folder):
    shutil.rmtree(output_folder, ignore_errors=True)
 os.makedirs(output_folder)

 for frame, frame_index, timestamp in key_frames:
    img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    draw = ImageDraw.Draw(img)
    font = ImageFont.load_default()
    draw.text((10, 10), f"Time: {timestamp:.2f}s", font=font, fill=(255, 255, 255))
    img.save(os.path.join(output_folder, f"_{frame_index}.jpg"))
    images.append(img)

 images[0].save("output.pdf", save_all=True, append_images=images[1:])
 shutil.rmtree(output_folder, ignore_errors=True)
	import cv2
	import numpy as np
	from PIL import Image, ImageDraw, ImageFont
	import os
	import shutil

	debug = False

	# UI using ShadCN, fast iteraation, learn how to do streaming UIs though

	# prompt user to decide thresholds, by showing two images and asking if similar, that way they get a good product too!
	# stream logs to frontend
	# also a tqdm cus ulta iterate kar rahe anyways have a progress percentage, can stream that to frotend

	def compute_ssim(img1, img2):
	"""Compute the structural similarity index between two images."""
	c1 = (0.01 * 255)**2
	c2 = (0.03 * 255)**2

	img1 = img1.astype(np.float64)
	img2 = img2.astype(np.float64)
	kernel = cv2.getGaussianKernel(11, 1.5)
	window = np.outer(kernel, kernel.transpose())

	mu1 = cv2.filter2D(img1, -1, window)[5:-5, 5:-5]
	mu2 = cv2.filter2D(img2, -1, window)[5:-5, 5:-5]
	mu1_sq = mu1**2
	mu2_sq = mu2**2
	mu1_mu2 = mu1 * mu2
	sigma1_sq = cv2.filter2D(img1**2, -1, window)[5:-5, 5:-5] - mu1_sq
	sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
	sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2

	ssim_map = ((2 * mu1_mu2 + c1) * (2 * sigma12 + c2)) / ((mu1_sq + mu2_sq + c1) * (sigma1_sq + sigma2_sq + c2))
	return ssim_map.mean()

	def extract_key_frames(video_path, output_folder, similarity_threshold=0.6, time_interval=1):
	"""Extract key frames from the video."""

	video = cv2.VideoCapture(video_path)
	fps = video.get(cv2.CAP_PROP_FPS)
	frame_interval = int(fps * time_interval)

	frame_count = 0

	frames = []
	while True:
	ret, frame = video.read()
	if not ret:
	break
	if frame_count % frame_interval == 0:
	gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	frames.append((frame, gray_frame, frame_count / fps))
	frame_count += 1
	video.release()
	frame_count = len(frames)

	sliding_window_length = 20
	fall_threshold = 30 # if decreased would make skips more probable (for 0.3 write 3000 and so on)
	normalizer_fall_threshold = 100

	prev_gray_frame = None
	prev_frame = None
	prev_frame_index = None
	prev_timestamp = None

	key_frames = []

	for frame_index in range(frame_count-1, -1, -1):
	# print(frame_index)
	current_frame, current_gray_frame, current_timestamp = frames[frame_index]
	if prev_frame is None:
	key_frames.append((current_frame, frame_index, current_timestamp))

	if debug: cv2.imwrite(os.path.join(output_folder, f"_{frame_index}.jpg"), current_frame)
	print(f"Commiting new frame {frame_index} that had DUMMY similarity with prev_frame {prev_frame_index} and had no abrupt fall in SSIM in the next 20 in sliding window")

	prev_frame = current_frame
	prev_gray_frame = current_gray_frame
	prev_frame_index = frame_index
	prev_timestamp = current_timestamp
	continue

	# there is a chance ki ye keyframe ho, unless haath hai, which we will check by sliding window
	if compute_ssim(current_gray_frame, prev_gray_frame) < similarity_threshold:
	future_ssims = []
	jumped_disturbance = False
	for future_frame_index in range(frame_index, max(frame_index-sliding_window_length, 0), -1):
	future_frame, gray_future_frame, future_timestamp = frames[future_frame_index]
	if len(future_ssims) and future_ssims[-1] - compute_ssim(prev_gray_frame, gray_future_frame) >= (fall_threshold/normalizer_fall_threshold):
	# then let's jump here
	prev_frame = future_frame
	prev_gray_frame = gray_future_frame
	prev_frame_index = future_frame_index
	prev_timestamp = future_timestamp
	frame_index = future_frame_index
	jumped_disturbance = True
	print(f"Jumped Disturbance from {frame_index + 1} to {future_frame_index}...")
	break
	future_ssims.append(compute_ssim(prev_gray_frame, gray_future_frame))

	if jumped_disturbance:
	# don't need to commit anything then
	continue

	# isn't a temp disturbance, commit this frame
	print(f"Commiting new frame {frame_index} that had {compute_ssim(current_gray_frame, prev_gray_frame)} similarity with prev_frame {prev_frame_index} and had no abrupt fall in SSIM in the next 20 in sliding window")


	if debug: cv2.imwrite(os.path.join(output_folder, f"_{frame_index}.jpg"), current_frame)


	key_frames.append((current_frame, frame_index, current_timestamp))

	prev_frame = current_frame
	prev_frame_index = frame_index
	prev_gray_frame = current_gray_frame
	prev_timestamp = current_timestamp
	return key_frames[::-1]

	# Usage
	video_path = "tests/test-2.mp4" # Assuming the video is named test.mp4
	output_folder = "key_frames"
	key_frames = extract_key_frames(video_path, output_folder)
	images = []

	if os.path.exists(output_folder):
	shutil.rmtree(output_folder, ignore_errors=True)
	os.makedirs(output_folder)

	for frame, frame_index, timestamp in key_frames:
	img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
	draw = ImageDraw.Draw(img)
	font = ImageFont.load_default()
	draw.text((10, 10), f"Time: {timestamp:.2f}s", font=font, fill=(255, 255, 255))
	img.save(os.path.join(output_folder, f"_{frame_index}.jpg"))
	images.append(img)

	images[0].save("output.pdf", save_all=True, append_images=images[1:])
	shutil.rmtree(output_folder, ignore_errors=True)