Last active
February 3, 2022 01:21
-
-
Save vvzen/692a9fb9539a693baff01c0c3709fde4 to your computer and use it in GitHub Desktop.
WIP: Automatic scene detection of Editorial Reference movies
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
input_video=$1 | |
# Parameters | |
scene_detection_score="0.3" | |
tesseract="/Users/vvzen/Desktop/framestore/experimental-development/automatic-TO-recognition/ocr/tesseract/tesseract" | |
# My testing has been done with | |
# tesseract 5.0.1-9-g31a968 | |
# leptonica-1.81.1 | |
# libgif 5.2.1 : libjpeg 9d : libpng 1.6.37 : libtiff 4.3.0 : zlib 1.2.11 : libwebp 1.2.1 : libopenjp2 2.4.0 | |
# Found AVX2 | |
# Found AVX | |
# Found FMA | |
# Found SSE4.1 | |
# Found libcurl/7.54.0 LibreSSL/2.6.5 zlib/1.2.11 nghttp2/1.24.1 | |
output_dir_name="output-frames-4" | |
if [ -f "$input_video" ]; then | |
echo "Reading $input_video" | |
else | |
echo "$input_video was not found on disk." | |
echo "Please specify a valid file!" | |
exit 1 | |
fi | |
mkdir -p ./$output_dir_name | |
ffmpeg -i $input_video -filter:v "select=gt(scene\,$scene_detection_score)" -vsync vfr $output_dir_name/frame%d.jpg | |
pushd $output_dir_name | |
num_files=$(find . -type f -name "frame*.jpg" | wc -l) | |
current_file_index=0 | |
for file in $(find . -type f -name "*.jpg"); do | |
echo "Processing frame $current_file_index / $num_files" | |
filename=$(basename $file) | |
# Pre-processing steps via imagemagick | |
# 1. Add border around image to help OCR | |
# 2. Convert to black and white to facilitate thresholding | |
convert ./$filename -colorspace Gray -bordercolor Black -border 10x10 "./$filename" | |
# OCR via tesseract | |
"$tesseract" ./$filename ./$filename.content \ | |
--psm 11 \ | |
-c tessedit_write_images=1 \ | |
-c load_system_dawg=0 \ | |
-c load_freq_dawg=0 \ | |
-c thresholding_score_fraction=0.2 | |
current_file_index=$((current_file_index+1)) | |
done | |
popd |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment