Last active
May 18, 2023 17:02
-
-
Save mvasin/2e0b99942f4ee2e718eb2471ff756c20 to your computer and use it in GitHub Desktop.
Transcribing audio files to text using OpenAI's Whisper model
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# A Mac utility to transcribe audio files using OpenAI's API (Whisper) | |
# | |
# Usage: | |
# | |
# $OPENAI_API_KEY='your-key-here' sh ./transcribe.sh [/path/to/the/dir/with/the/audio/files] | |
# | |
# If the path to the files is not specified, the utility will work in the current directory | |
[ -z "$OPENAI_API_KEY" ] && echo "OPENAI_API_KEY environment variable is not set" | |
MAX_SIZE=26214400 # size in bytes equivalent to 25MB | |
SEGMENT=1200 # if the file is too big, it will be split into 20 min segments | |
if ! command -v jq &> /dev/null | |
then | |
echo "the 'jq' utility could not be found, install it with 'brew install jq'" | |
exit | |
fi | |
if ! command -v ffmpeg &> /dev/null | |
then | |
echo "the 'ffmpeg' utility could not be found, install it with 'brew install ffmpeg'" | |
exit | |
fi | |
WORK_DIR=${1:-$PWD} | |
INITIAL_PWD=$PWD | |
cd "$WORK_DIR" | |
# if there are any mp4 files in the working directory, they will have the audio extracted to an m4a file | |
for file in *.mp4 | |
do | |
# if the video file exists but the corresponding audio file doesn't - extract audio | |
if [ -e "$file" ] && ! [ -e "${file%.mp4}.m4a" ]; then | |
ffmpeg -hide_banner -loglevel error -i "${file}" -vn -acodec copy "${file%.mp4}.m4a" > /dev/null 2>&1 | |
fi | |
done | |
transcribe(){ | |
echo "Transcribing '$file_to_transcribe'..." | |
RESPONSE=$(curl -s https://api.openai.com/v1/audio/transcriptions \ | |
-H "Authorization: Bearer $OPENAI_API_KEY" \ | |
-H "Content-Type: multipart/form-data" \ | |
-F "file=@${file_to_transcribe}" \ | |
-F "language=en" \ | |
-F "model=whisper-1") | |
if echo "$RESPONSE" | grep -q "text"; then | |
output="${file_to_transcribe%.*}.txt" | |
echo "$RESPONSE" | jq -j .text > "$output" | |
echo "'$file_to_transcribe' has been transcribed successfully ✅" | |
else | |
echo "Failed to transcribe '$split_file' ❌" | |
echo $RESPONSE\n | |
exit 1 | |
fi | |
} | |
for file in *; do | |
if [[ "$file" == *.m4a ]] || [[ "$file" == *.mp3 ]] | |
then | |
FILE_SIZE=$(stat -f%z "$file") | |
if (( FILE_SIZE > MAX_SIZE )) | |
then | |
echo "File '$file' is too big ($FILE_SIZE bytes). Splitting into smaller segments 🪚..." | |
ffmpeg -hide_banner -loglevel error -i "$file" -f segment -segment_time $SEGMENT -c copy "${file%.*}-split-%03d.m4a" | |
for split_file in "${file%.*}-split-"*.m4a; do | |
file_to_transcribe=$split_file | |
transcribe_and_rm() { | |
transcribe | |
rm "$split_file" # remove the split file after transcribing | |
} | |
transcribe_and_rm & | |
done | |
else | |
file_to_transcribe="$file" | |
transcribe & | |
fi | |
fi | |
done | |
cd $INITIAL_PWD | |
wait |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment