Last active
November 3, 2020 18:45
-
-
Save junjuew/c9e5a63d5f1529dc83e94deade3a6ef5 to your computer and use it in GitHub Desktop.
Deduplicate Images Based on Their Perceptual Hash
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pathlib | |
import cv2 as cv | |
import fire | |
import imagehash | |
from PIL import Image | |
def dedup(video_uri: str, output_dir: str, min_hash_difference_between_key_frames: float=10): | |
cap = cv.VideoCapture(video_uri) | |
if not cap.isOpened(): | |
raise ValueError(f"Cannot open ${video_uri}") | |
output_dir = pathlib.Path(output_dir) | |
output_dir.mkdir(exist_ok=True) | |
base_image_hash = None | |
idx = 0 | |
while True: | |
valid, frame = cap.read() | |
if not valid: | |
break | |
image = cv.cvtColor(frame, cv.COLOR_BGR2RGB) | |
image = Image.fromarray(image) | |
image_hash = imagehash.phash(image) | |
if base_image_hash is None or ( | |
image_hash - base_image_hash >= min_hash_difference_between_key_frames): | |
base_image_hash = image_hash | |
print(f"Identified frame {idx} to be a key frame.") | |
image.save(output_dir/f"{idx:05}.jpg") | |
idx += 1 | |
cap.release() | |
if __name__ == "__main__": | |
fire.Fire(dedup) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment