Skip to content

Instantly share code, notes, and snippets.

@vadimkantorov
Last active January 1, 2023 18:31
Show Gist options
  • Save vadimkantorov/0f4a2060f1dc245e6ad0e019b99d59ee to your computer and use it in GitHub Desktop.
Save vadimkantorov/0f4a2060f1dc245e6ad0e019b99d59ee to your computer and use it in GitHub Desktop.
Finds recursively all videos in a directory, and produces a webpage with a frame for every 30 minutes of video. A Python primer of gathering basic video information using ffprobe, and reading JPEG-encoded frames using ffmpeg.
# Usage: python thumbnail_video_collection.py /path/to/video/dir > thumbnail.html
import os
import sys
import json
import subprocess
import datetime
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('video_dir')
parser.add_argument('--skip_seconds', type = int, default = 30 * 60)
parser.add_argument('--thumb_size', default = '320x240')
parser.add_argument('--video_extension', default = ['mp4'], action = 'append')
args = parser.parse_args()
video_paths = sorted([os.path.join(root, file) for root, _, files in os.walk(args.video_dir) for file in files for ext in args.video_extension if file.endswith(ext)])
print '<html><body><table border="1px">'
for video_index, video_path in enumerate(video_paths):
print >> sys.stderr, video_index + 1, '/', len(video_paths), os.path.basename(video_path)
ffprobe = subprocess.check_output(['ffprobe', '-v', 'quiet', '-print_format', 'json', '-show_streams', video_path])
frame_width, frame_height, duration = [(stream['width'], stream['height'], int(float(stream['duration']))) for stream in json.loads(ffprobe)['streams'] if stream['codec_type'] == 'video'][0]
print '<tr>'
print ''.join(map('<td style="white-space:nowrap">{0}</td>'.format, ['#%d' % (video_index + 1), 'W%d' % frame_width, 'H%d' % frame_height, datetime.timedelta(seconds = duration)] + video_path.split('/')))
for ss in range(0, duration - args.skip_seconds, args.skip_seconds):
jpeg = subprocess.check_output(['ffmpeg', '-v', 'quiet', '-ss', str(ss), '-i', video_path, '-s', args.thumb_size, '-an', '-f', 'image2pipe', '-c:v', 'mjpeg', '-q:v', '1', '-frames:v', '1', '-']) # this explicit iteration with fast seeks is 10x faster than "-skip_frame nokey -vf fps=1/1800"
print '<td><img src="data:image/jpeg;base64,%s" /></td>' % jpeg.encode('base64').replace('\n', '')
print '</tr>'
print '</table></body></html>'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment