Last active
May 23, 2023 21:48
-
-
Save maledorak/1f84681434e427188685d07c1816a287 to your computer and use it in GitHub Desktop.
Microphone voice transcription and translation with OpenAI Whisper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# ==================================== | |
# author: https://github.com/maledorak | |
# email: [email protected] | |
# version: 0.1 | |
# description: Script for recording voice from microphone and sending to Whisper OpenAI | |
# | |
# how to use: | |
# 1. run it - this will start recording sound: ./whisper.sh -c transcribe -l 30 | |
# 2. stop recording by pressing Ctrl+C or rerun it again with the same arguments to 'shorten' the recording | |
# | |
# tips: | |
# The best way to use it is to bind it to a key combination in your window manager | |
# and then just press it to start recording and stop recording with the same key combination | |
# ==================================== | |
# Fail the script | |
function fail { exit 1; } | |
# Supported commands | |
TRANSCRIBE='transcribe' | |
TRANSLATE='translate' | |
COMMANDS_ARRAY=($TRANSCRIBE $TRANSLATE) | |
COMMANDS_PIPED=$(IFS="|" ; echo "${COMMANDS_ARRAY[*]}") | |
function show_help_and_exit | |
{ | |
printf "\nUsage: %s -c [${COMMANDS_PIPED}] -s [30] \n\n" "$(basename "$0")" | |
printf "Script for recording voice from microphone and sending to Whisper OpenAI \n\n" | |
printf "Options:\n" | |
printf " %-20s - %s\n" "-h|--help" "Show this help message" | |
printf " %-20s - %s\n" "-c" "Whisper command to run [${COMMANDS_PIPED}}]" | |
printf " %-20s - %s\n" "-l" "Record length in seconds (default: 30)" | |
printf " %-20s - %s\n" "-k" "Kill all whisper processes" | |
printf "\n" | |
fail | |
} | |
#====== Check if following commands are installed ==== | |
# Check if ffmpeg is installed | |
if ! command -v ffmpeg &> /dev/null | |
then | |
echo "ffmpeg could not be found" | |
fail | |
fi | |
# Check if jq is installed | |
if ! command -v jq &> /dev/null | |
then | |
echo "jq could not be found" | |
fail | |
fi | |
# Check if xclip is installed | |
if ! command -v xclip &> /dev/null | |
then | |
echo "xclip could not be found" | |
fail | |
fi | |
# Check if openai is installed | |
if ! command -v openai &> /dev/null | |
then | |
echo "openai could not be found" | |
fail | |
fi | |
# Check if notify-send is installed | |
if ! command -v notify-send &> /dev/null | |
then | |
echo "notify-send could not be found" | |
fail | |
fi | |
#======================================= | |
# ---------------------------------------------------------------------------------------------------------------------- | |
COMMAND= | |
LENGTH=30 | |
# Parse arguments | |
while getopts "hkc:l:" opt; do | |
case "${opt}" in | |
h) | |
show_help_and_exit | |
;; | |
\?) | |
error "Unknown option: $1" | |
show_help_and_exit | |
;; | |
c) | |
COMMAND=${OPTARG} | |
;; | |
l) | |
LENGTH=${OPTARG} | |
;; | |
k) | |
notify-send "Whisper" "Killing all processes" & | |
pkill -f whisper | |
exit 0 | |
;; | |
esac | |
done | |
# Validate command argument | |
if [[ -z $COMMAND ]]; then | |
show_help_and_exit | |
fi | |
# Validate command argument, only transcribe or translate are supported | |
if [[ ! "$COMMAND" =~ ^($COMMANDS_PIPED)$ ]]; then | |
echo "You can use only [$COMMANDS_PIPED] as a command [-c] argument, but '${COMMAND}' was used!" | |
show_help_and_exit | |
fi | |
# Validate length argument | |
if [[ ! $LENGTH =~ ^[[:digit:]-]+$ ]]; then | |
echo "Length [-l] argument must be a number, but '${LENGTH}' was used!" | |
fail | |
fi | |
# Validate length argument | |
if [[ -z $OPENAI_API_KEY ]]; then | |
echo "OPENAI_API_KEY environment variable is not set!" | |
echo "Use the following link to set it up: https://platform.openai.com/account/api-keys" | |
fail | |
fi | |
function openai_request { | |
local cmd=$1 | |
local file_path=$2 | |
declare -A cmd_map=( ["$TRANSCRIBE"]="transcriptions" ["$TRANSLATE"]="translations" ) | |
local url_path=${cmd_map[$cmd]} | |
# https://platform.openai.com/docs/api-reference/audio/create | |
local response=$(curl https://api.openai.com/v1/audio/$url_path \ | |
-H "Authorization: Bearer $OPENAI_API_KEY" \ | |
-H "Content-Type: multipart/form-data" \ | |
-F file="@$file_path" \ | |
-F model="whisper-1") | |
local text=$(echo $response | jq -r '.text') | |
if [ "$text" == "null" ]; then | |
echo "Whisper API error $response" | |
return 1 | |
else | |
echo $text | |
return 0 | |
fi | |
} | |
function ffmpeg_record { | |
local file_path=$1 | |
ffmpeg \ | |
-f alsa \ | |
-channels 2 \ | |
-sample_rate 44100 \ | |
-i hw:0 \ | |
-t $LENGTH \ | |
$file_path | |
local code=$? | |
if [ $code -eq 255 ]; then | |
# ffmpeg returns 255 when Ctrl+C is pressed or SIGINT (-2) is received | |
echo "FFmpeg has stopped with SIGINT" | |
return 0 | |
elif [ $code -ne 0 ]; then | |
echo "FFmpeg error $code" | |
return $code | |
else | |
echo "FFmpeg has stopped" | |
return 0 | |
fi | |
} | |
FILE_PREFIX=whisper_$COMMAND | |
IS_OTHER_INSTANCE_RUNNING=$(pgrep -fa $FILE_PREFIX | awk '{print $NF}') | |
if [ -z "$IS_OTHER_INSTANCE_RUNNING" ]; then | |
echo "No process, FFmpeg recording" | |
notify-send "Whisper" "$COMMAND is listening..." & | |
FFMPEG_OUT="/tmp/${FILE_PREFIX}_$(date '+%Y%m%d%H%M%S').mp3" | |
ffmpeg_record $FFMPEG_OUT | |
ffmpeg_record_code=$? | |
if [ $ffmpeg_record_code -eq 0 ]; then | |
echo "FFmpeg has stopped, sending request to OpenAI" | |
declare -A cmd_map=( ["$TRANSCRIBE"]="Transcribing" ["$TRANSLATE"]="Translating" ) | |
notify_string=${cmd_map[$COMMAND]} | |
notify-send "Whisper" "$notify_string..." & | |
text=$(openai_request $COMMAND $FFMPEG_OUT) | |
openai_code=$? | |
if [ $openai_code -ne 0 ]; then | |
notify-send "Whisper" "OpenAI error..." & | |
rm -rf $FFMPEG_OUT | |
exit $openai_code | |
else | |
echo "OpenAI response: $text" | |
echo -n $text | xclip -selection clipboard | |
notify-send "Whisper" "'$text' copied to clipboard." & | |
echo "Clean up file $FFMPEG_OUT" | |
rm -rf $FFMPEG_OUT | |
exit 0 | |
fi | |
else | |
echo "FFmpeg error $ffmpeg_record_code" | |
notify-send "Whisper" "FFmpeg error $ffmpeg_record_code" & | |
exit $ffmpeg_record_code | |
fi | |
else | |
echo "Process found, killing and trimming audio" | |
pid=$(pgrep -fa $FILE_PREFIX | awk '{print $1}') | |
kill -SIGINT $pid | |
while $(kill -0 $pid 2>/dev/null); do | |
echo 'Waiting for ffmpeg to finish...' | |
sleep 0.01 | |
done | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment