yanndebray · September 3, 2025 16:15
diff --git a/extract_slides.py b/extract_slides.py
 #!/usr/bin/env python3
 """
 Extract distinct slides from a presentation video.

 Usage:
  python extract_slides.py --video input.mp4 --out slides/ --interval 10 --threshold 6

 Args:
  --video      Path to the input video file.
  --out        Output folder for saved images (created if missing).
  --interval   Seconds between samples (default: 10).
  --threshold  Minimum Hamming distance (dHash) to consider a "new" slide (default: 6).
               Lower = stricter (fewer images); higher = looser (more images).
 """
 import os
 import cv2
 import math
 import argparse
 from pathlib import Path

 def dhash(gray_img):
    """
    Compute difference hash (dHash) on a grayscale image.
    Returns 64-bit integer hash.
    """
    # Resize to 9x8, blur lightly to reduce tiny noise
    small = cv2.resize(gray_img, (9, 8), interpolation=cv2.INTER_AREA)
    small = cv2.GaussianBlur(small, (3, 3), 0)

    # Compare adjacent pixels horizontally -> 8x8 boolean grid
    diff = small[:, 1:] > small[:, :-1]
    # Pack into 64-bit int
    h = 0
    for bit in diff.flatten():
        h = (h << 1) | int(bit)
    return h

 def hamming_distance(a, b):
    x = a ^ b
    # count bits
    count = 0
    while x:
        x &= x - 1
        count += 1
    return count

 def grab_frame_at(cap, t_seconds):
    """
    Seek to t_seconds (ms precision) and return BGR frame or None if failed.
    """
    cap.set(cv2.CAP_PROP_POS_MSEC, t_seconds * 1000.0)
    ok, frame = cap.read()
    if not ok:
        return None
    return frame

 def main():
    ap = argparse.ArgumentParser(description="Extract distinct slides from a presentation video.")
    ap.add_argument("--video", required=True, help="Path to input video")
    ap.add_argument("--out", required=True, help="Output directory for images")
    ap.add_argument("--interval", type=float, default=10.0, help="Sampling interval in seconds (default: 10)")
    ap.add_argument("--threshold", type=int, default=6, help="Min Hamming distance to treat as new slide (default: 6)")
    ap.add_argument("--prefix", default="slide", help="Filename prefix (default: slide)")
    args = ap.parse_args()

    Path(args.out).mkdir(parents=True, exist_ok=True)

    cap = cv2.VideoCapture(args.video)
    if not cap.isOpened():
        raise RuntimeError(f"Failed to open video: {args.video}")

    fps = cap.get(cv2.CAP_PROP_FPS) or 10.0
    frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0
    duration = frame_count / fps if frame_count > 0 else None

    print(f"[i] FPS: {fps:.3f}, frames: {int(frame_count)}, duration: {duration:.2f}s" if duration else "[i] Duration unknown")

    prev_hash = None
    saved = 0
    t = 0.0
    end_time = duration if duration and math.isfinite(duration) else float("inf")

    while t <= end_time:
        frame = grab_frame_at(cap, t)
        if frame is None:
            # If we can't read at this exact time, try moving on
            # Stop if we fail near expected end
            if duration and t > duration - args.interval:
                break
            t += args.interval
            continue

        # Convert to grayscale for hashing
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        cur_hash = dhash(gray)

        is_new = prev_hash is None or hamming_distance(cur_hash, prev_hash) >= args.threshold
        if is_new:
            ts_label = f"{int(t):04d}s"
            out_path = os.path.join(args.out, f"{args.prefix}_{ts_label}.jpg")
            # High quality JPEG
            cv2.imwrite(out_path, frame, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
            saved += 1
            prev_hash = cur_hash
            print(f"[+] Saved {out_path}")
        else:
            print(f"[-] Skipped at {t:.1f}s (no slide change)")

        t += args.interval

    cap.release()
    print(f"[✓] Done. Saved {saved} slide(s) to {args.out}")

 if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	"""
	Extract distinct slides from a presentation video.

	Usage:
	python extract_slides.py --video input.mp4 --out slides/ --interval 10 --threshold 6

	Args:
	--video Path to the input video file.
	--out Output folder for saved images (created if missing).
	--interval Seconds between samples (default: 10).
	--threshold Minimum Hamming distance (dHash) to consider a "new" slide (default: 6).
	Lower = stricter (fewer images); higher = looser (more images).
	"""
	import os
	import cv2
	import math
	import argparse
	from pathlib import Path

	def dhash(gray_img):
	"""
	Compute difference hash (dHash) on a grayscale image.
	Returns 64-bit integer hash.
	"""
	# Resize to 9x8, blur lightly to reduce tiny noise
	small = cv2.resize(gray_img, (9, 8), interpolation=cv2.INTER_AREA)
	small = cv2.GaussianBlur(small, (3, 3), 0)

	# Compare adjacent pixels horizontally -> 8x8 boolean grid
	diff = small[:, 1:] > small[:, :-1]
	# Pack into 64-bit int
	h = 0
	for bit in diff.flatten():
	h = (h << 1) \| int(bit)
	return h

	def hamming_distance(a, b):
	x = a ^ b
	# count bits
	count = 0
	while x:
	x &= x - 1
	count += 1
	return count

	def grab_frame_at(cap, t_seconds):
	"""
	Seek to t_seconds (ms precision) and return BGR frame or None if failed.
	"""
	cap.set(cv2.CAP_PROP_POS_MSEC, t_seconds * 1000.0)
	ok, frame = cap.read()
	if not ok:
	return None
	return frame

	def main():
	ap = argparse.ArgumentParser(description="Extract distinct slides from a presentation video.")
	ap.add_argument("--video", required=True, help="Path to input video")
	ap.add_argument("--out", required=True, help="Output directory for images")
	ap.add_argument("--interval", type=float, default=10.0, help="Sampling interval in seconds (default: 10)")
	ap.add_argument("--threshold", type=int, default=6, help="Min Hamming distance to treat as new slide (default: 6)")
	ap.add_argument("--prefix", default="slide", help="Filename prefix (default: slide)")
	args = ap.parse_args()

	Path(args.out).mkdir(parents=True, exist_ok=True)

	cap = cv2.VideoCapture(args.video)
	if not cap.isOpened():
	raise RuntimeError(f"Failed to open video: {args.video}")

	fps = cap.get(cv2.CAP_PROP_FPS) or 10.0
	frame_count = cap.get(cv2.CAP_PROP_FRAME_COUNT) or 0
	duration = frame_count / fps if frame_count > 0 else None

	print(f"[i] FPS: {fps:.3f}, frames: {int(frame_count)}, duration: {duration:.2f}s" if duration else "[i] Duration unknown")

	prev_hash = None
	saved = 0
	t = 0.0
	end_time = duration if duration and math.isfinite(duration) else float("inf")

	while t <= end_time:
	frame = grab_frame_at(cap, t)
	if frame is None:
	# If we can't read at this exact time, try moving on
	# Stop if we fail near expected end
	if duration and t > duration - args.interval:
	break
	t += args.interval
	continue

	# Convert to grayscale for hashing
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	cur_hash = dhash(gray)

	is_new = prev_hash is None or hamming_distance(cur_hash, prev_hash) >= args.threshold
	if is_new:
	ts_label = f"{int(t):04d}s"
	out_path = os.path.join(args.out, f"{args.prefix}_{ts_label}.jpg")
	# High quality JPEG
	cv2.imwrite(out_path, frame, [int(cv2.IMWRITE_JPEG_QUALITY), 95])
	saved += 1
	prev_hash = cur_hash
	print(f"[+] Saved {out_path}")
	else:
	print(f"[-] Skipped at {t:.1f}s (no slide change)")

	t += args.interval

	cap.release()
	print(f"[✓] Done. Saved {saved} slide(s) to {args.out}")

	if __name__ == "__main__":
	main()