Skip to content

Instantly share code, notes, and snippets.

@johannes-riecken
Last active March 9, 2025 17:25
Show Gist options
  • Save johannes-riecken/ffec61ddab38143ea2f6a950a2a42b9a to your computer and use it in GitHub Desktop.
Save johannes-riecken/ffec61ddab38143ea2f6a950a2a42b9a to your computer and use it in GitHub Desktop.
Extracting slides from "Inheritance is the base class of evil"
# First run `ffmpeg -i movie.mp4 -vf crop=953:704:162:11 cropped.mp4`
# Then run `ffmpeg -i cropped.mp4 'frames/%05d.png'` and run `python3 check_gray.py frames/*.png`.
# The script deletes frames where the upper left corner 10x10 box is neither white nor dark gray.
from PIL import Image
import numpy as np
import sys
import os
for f in sys.argv[1:]:
# Load the image
image = Image.open(f)
# Crop the 10x10 region from the top-left (0,0)
cropped = image.crop((0, 0, 10, 10))
# Convert to NumPy array for processing
pixels = np.array(cropped)
# Compute the average color in the region
avg_color = pixels.mean(axis=(0, 1))[:3] # Ignore alpha channel if present
# Convert to integer RGB values
r, g, b = map(int, avg_color)
# Reference color (Hex #485858 -> RGB 72, 88, 88)
r_ref, g_ref, b_ref = 72, 88, 88
r_ref_white, g_ref_white, b_ref_white = 248, 248, 248
# Compute Euclidean distance
distance = np.sqrt((r - r_ref)**2 + (g - g_ref)**2 + (b - b_ref)**2)
distance_white = np.sqrt((r - r_ref_white)**2 + (g - g_ref_white)**2 + (b - b_ref_white)**2)
# Define a threshold for approximate match
threshold = 20
image.close()
# Check if the color is approximately equal
if distance >= threshold and distance_white >= threshold:
os.remove(f)
# removes adjacent frames that are too similar. To be run as `python3 remove_duplicate_frames.py`
import cv2
import os
import numpy as np
def are_frames_similar(frame1, frame2, threshold=0.99):
"""
Compares two frames using template matching and returns True if they are similar.
Args:
frame1: The first frame (numpy array).
frame2: The second frame (numpy array).
threshold: The similarity threshold (correlation value).
Returns:
True if the frames are similar, False otherwise.
"""
result = cv2.matchTemplate(frame1, frame2, cv2.TM_CCORR_NORMED)
_, correlation, _, _ = cv2.minMaxLoc(result)
return correlation >= threshold
def remove_duplicate_frames(directory):
"""
Loops through PNG frames in a directory, compares adjacent frames, and removes duplicates.
Args:
directory: The directory containing the PNG frames.
"""
frames = sorted([f for f in os.listdir(directory) if f.endswith(".png")])
if not frames:
print(f"No PNG frames found in directory: {directory}")
return
previous_frame = None
frames_to_remove = []
for i, frame_file in enumerate(frames):
frame_path = os.path.join(directory, frame_file)
current_frame = cv2.imread(frame_path)
if current_frame is None:
print(f"Error: Could not read frame {frame_file}")
continue
if previous_frame is not None:
if are_frames_similar(previous_frame, current_frame):
frames_to_remove.append(frame_file)
else:
previous_frame = current_frame #Only store the current frame as the previous frame, if they are not similar.
else:
previous_frame = current_frame # First frame, set as previous frame.
for frame_file in frames_to_remove:
frame_path = os.path.join(directory, frame_file)
os.remove(frame_path)
print(f"Removed duplicate frame: {frame_file}")
if __name__ == "__main__":
directory = "frames" # Replace with your directory
remove_duplicate_frames(directory)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment