Skip to content

Instantly share code, notes, and snippets.

@yanndebray
Created September 3, 2025 16:15
Show Gist options
  • Save yanndebray/8a05b2ce0ed5e427070c579282ed2672 to your computer and use it in GitHub Desktop.
Save yanndebray/8a05b2ce0ed5e427070c579282ed2672 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Extract distinct slides from a presentation video.
Usage:
python extract_slides.py --video input.mp4 --out slides/ --interval 10 --threshold 6
Args:
--video Path to the input video file.
--out Output folder for saved images (created if missing).
--interval Seconds between samples (default: 10).
--threshold Minimum Hamming distance (dHash) to consider a "new" slide (default: 6).
Lower = stricter (fewer images); higher = looser (more images).
"""
import os
import cv2
import math
import argparse
from pathlib import Path
def dhash(gray_img):
"""
Compute difference hash (dHash) on a grayscale image.
Returns 64-bit integer hash.
"""
# Resize to 9x8, blur lightly to reduce tiny noise
small = cv2.resize(gray_img, (9, 8), interpolation=cv2.INTER_AREA)
small = cv2.GaussianBlur(small, (3, 3), 0)
# Compare adjacent pixels horizontally -> 8x8 boolean grid
diff = small[:, 1:] > small[:, :-1]
# Pack into 64-bit int
h = 0
for bit in diff.flatten():
h = (h << 1) | int(bit)
return h
def hamming_distance(a, b):
x = a ^ b
# count bits
count = 0
while x:
x &= x - 1
count += 1
return count
def grab_frame_at(cap, t_seconds):
"""
Seek to t_seconds (ms precision) and return BGR frame or None if failed.
"""
cap.set(cv2.CAP_PROP_POS_MSEC, t_seconds * 1000.0)
ok, frame = cap.read()
if not ok:
return None
return frame
def main():
ap = argparse.ArgumentParser(description="Extract distinct slides from a presentation video.")
ap.add_argument("--video", required=True, help="Path to input video")
ap.add_argument("--out", required=True, help="Output directory for images")
ap.add_argument("--interval", type=float, default=10.0, help="Sampling interval in seconds (default: 10)")
ap.add_argument("--threshold", type=int, default=6, help="Min Hamming distance to treat as new slide (default: 6)")
ap.add_argument("--prefix", default="slide", help="Filename prefix (default: slide)")
args = ap.parse_args()
Path(args.out).mkdir(parents=True, exist_ok=True)
cap = cv2.VideoCapture(args.video)
if not cap.isOpened():
raise RuntimeError(f"Failed to open video: {args.video}")
fps = cap.get(cv2.CAP_PROP_FPS) or 10.0
frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0
duration = frame_count / fps if frame_count > 0 else None
print(f"[i] FPS: {fps:.3f}, frames: {int(frame_count)}, duration: {duration:.2f}s" if duration else "[i] Duration unknown")
prev_hash = None
saved = 0
t = 0.0
end_time = duration if duration and math.isfinite(duration) else float("inf")
while t <= end_time:
frame = grab_frame_at(cap, t)
if frame is None:
# If we can't read at this exact time, try moving on
# Stop if we fail near expected end
if duration and t > duration - args.interval:
break
t += args.interval
continue
# Convert to grayscale for hashing
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cur_hash = dhash(gray)
is_new = prev_hash is None or hamming_distance(cur_hash, prev_hash) >= args.threshold
if is_new:
ts_label = f"{int(t):04d}s"
out_path = os.path.join(args.out, f"{args.prefix}_{ts_label}.jpg")
# High quality JPEG
cv2.imwrite(out_path, frame, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
saved += 1
prev_hash = cur_hash
print(f"[+] Saved {out_path}")
else:
print(f"[-] Skipped at {t:.1f}s (no slide change)")
t += args.interval
cap.release()
print(f"[✓] Done. Saved {saved} slide(s) to {args.out}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment