Skip to content

Instantly share code, notes, and snippets.

@junjuew
Last active November 3, 2020 18:45
Show Gist options
  • Save junjuew/c9e5a63d5f1529dc83e94deade3a6ef5 to your computer and use it in GitHub Desktop.
Save junjuew/c9e5a63d5f1529dc83e94deade3a6ef5 to your computer and use it in GitHub Desktop.
Deduplicate Images Based on Their Perceptual Hash
import pathlib
import cv2 as cv
import fire
import imagehash
from PIL import Image
def dedup(video_uri: str, output_dir: str, min_hash_difference_between_key_frames: float=10):
cap = cv.VideoCapture(video_uri)
if not cap.isOpened():
raise ValueError(f"Cannot open ${video_uri}")
output_dir = pathlib.Path(output_dir)
output_dir.mkdir(exist_ok=True)
base_image_hash = None
idx = 0
while True:
valid, frame = cap.read()
if not valid:
break
image = cv.cvtColor(frame, cv.COLOR_BGR2RGB)
image = Image.fromarray(image)
image_hash = imagehash.phash(image)
if base_image_hash is None or (
image_hash - base_image_hash >= min_hash_difference_between_key_frames):
base_image_hash = image_hash
print(f"Identified frame {idx} to be a key frame.")
image.save(output_dir/f"{idx:05}.jpg")
idx += 1
cap.release()
if __name__ == "__main__":
fire.Fire(dedup)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment