Last active
March 23, 2024 02:49
-
-
Save AustinSaintAubin/a50b29ce52de5501a6dd05bf5d24cd44 to your computer and use it in GitHub Desktop.
Whisper ASR Webservice | File Processing Script | 2023/09/23 | v2.8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
echo "Whisper ASR Webservice | File Proccessing Script | 2023/09/23 | v2.8" | |
echo "Author: Austin St. Aubin w/ a little help from ChatGPT." | |
echo "License: MIT License" | |
# This is a bash script that transcribes an audio file using a web service and outputs the transcript in various formats. | |
# The audio file is located at the SOURCE_PATH and the transcripts will be saved to the same directory with transcriptions as specified in TRANSCRIPT_EXTENSIONS. | |
# https://github.com/ahmetoner/whisper-asr-webservice/issues/93 | |
# https://gist.github.com/AustinSaintAubin/a50b29ce52de5501a6dd05bf5d24cd44 | |
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - | |
# bash /volume1/docker/whisper-asr-webservice/whisper-asr-webservice_file-processor.sh "/volume1/docker/whisper-asr-webservice/audio" --output "txt,tsv,srt,vtt,json" --gpu --reprocess | |
# ------------------------------------------------------- | |
# Default Values | |
WISPER_ASR_SERVER_IP="localhost" # "192.168.20.27" # "192.168.10.91" | |
WISPER_ASR_SERVER_CPU="http://${WISPER_ASR_SERVER_IP}:9005" # Instance for CPU processing | |
WISPER_ASR_SERVER_GPU="http://${WISPER_ASR_SERVER_IP}:9006" # Instance for GPU processing | |
# WISPER_ASR_SERVER="${WISPER_ASR_SERVER:-WISPER_ASR_SERVER_GPU}" # "http://localhost:9000" | |
WISPER_ASR_TASK="transcribe" | |
WISPER_ASR_LANG="en" | |
WISPER_ASR_INITIAL_PROMPT="" # "- Hey how are you doing? - I'm doing good. How are you?" | |
TRANSCRIPT_EXTENSIONS="txt,tsv,srt" # "txt,tsv,srt,vtt,json" are the supported extensions for the transcripts. | |
DESTINATION_OUTPUT_PRINT=false # true/false | print the contents of the destination file to termnal | |
DESTINATION_REPROCESS=false # true/false | If true, the script will overwrite the destination file if it already exists. | |
SOURCE_TEMP_TRANSCODING_KEEP=false # true/false | delete the temparary transcoding file once script finishes | |
# User Output Helper | |
helper_output () { | |
echo "Usage: $(basename "$0") AUDIO-FILE [OPTIONS] | |
NOTE: Path to the source audio file/folder is the only input required. Can be passed without flag. All other parrameaters/options are optional. | |
-i, --input Path to input audio file. (mp3|mp4|wav|m4a) | |
-o, --output List of transcription output types, listed by extention, comma seperated. (txt,tsv,srt,vtt,json) | |
-s, --server Server Address. (http://localhost:9000) | |
-t, --task Task for the server. (transcribe) | |
-l, --lang Language of source audio file (en) | |
-e,--initial-prompt Intital Prompt for use in adding contect to wisper proccesing. | |
-p, --print Print contents of destination file when done. | |
-r, --reprocess Delete and reproccess selected transcription types. | |
-k, --keep-temp Keep converted tempararty audio files from M4A transcoding. | |
-d, --dest-dir Destination dirrectory, if differs from source. | |
-c/-g, --cpu / --gpu Secifies which Wisper ASR Webservice Server to use... the one for CPU or GPU realted task. | |
--help Shows this guide. | |
" >&2 | |
echo "Example: bash $(realpath "$0") \"$(pwd)\" --output \"txt,srt\" --print --gpu" | |
} | |
# time durration | |
function format_time() { | |
local seconds=$1 | |
local minutes=$(( seconds / 60 )) | |
local hours=$(( minutes / 60 )) | |
local remainder_seconds=$(( seconds % 60 )) | |
local remainder_minutes=$(( minutes % 60 )) | |
printf 'Duration: %02d hours, %02d minutes, %02d seconds' $hours $remainder_minutes $remainder_seconds | |
printf ' = %02d:%02d:%02d\n' $hours $remainder_minutes $remainder_seconds | |
} | |
# file size | |
function format_file_size() { | |
ls -l --all --human-readable --size "$@" | awk -F " " {'print $1'} | |
} | |
# file audio dirration | |
function format_audio_durration() { | |
ffmpeg -i "$@" 2>&1 | grep 'Duration' | grep -oP "[0-9]{2}:[0-9]{2}:[0-9]{2}" # [0-9]{2}:[0-9]{2}:[0-9]{2}.[0-9]{2} | |
} | |
function output_task_file_info() { | |
local seconds=$1 | |
local filepath=$2 | |
echo " └─ Task $(format_time ${seconds}) | File Size: $(format_file_size ${filepath}) | Audio Durration: $(format_audio_durration ${filepath})" | |
} | |
# Parse Options | |
options=$(getopt --quiet -o i:o:s:t:l:e:d:prkcgh --long input:,output:,server:,task:,lang:,initial-prompt:,dest-dir:,print,reprocess,keep-temp,cpu,gpu,help -- "$@") >&2 | |
if [ $? -ne 0 ]; then | |
echo "ERROR: UNKNOWN OPTIONS INPUT!" | |
helper_output | |
exit 1 | |
fi | |
# Parser | |
eval set -- "$options" | |
while true; do | |
case $1 in | |
-i|--input) | |
SOURCE_PATH="${2}" | |
shift 2 | |
;; | |
-o|--output) | |
TRANSCRIPT_EXTENSIONS="${2}" | |
shift 2 | |
;; | |
-s|--server) | |
WISPER_ASR_SERVER="${2}" | |
shift 2 | |
;; | |
-t|--task) | |
WISPER_ASR_TASK="${2}" | |
shift 2 | |
;; | |
-l|--lang) | |
WISPER_ASR_LANG="${2}" | |
shift 2 | |
;; | |
-e|--initial-prompt) | |
WISPER_ASR_INITIAL_PROMPT="${2}" | |
shift 2 | |
;; | |
-d|--dest-dir) | |
USER_DEFINED_DESTINATION_PATH_DIRECTORY="${2}" | |
shift 2 | |
;; | |
-p|--print) | |
DESTINATION_OUTPUT_PRINT=true | |
shift | |
;; | |
-r|--reprocess) | |
DESTINATION_REPROCESS=true | |
shift | |
;; | |
-k|--keep-temp) | |
SOURCE_TEMP_TRANSCODING_KEEP=true | |
shift | |
;; | |
-c|--cpu) | |
WISPER_ASR_SERVER="${WISPER_ASR_SERVER_CPU}" | |
shift | |
;; | |
-g|--gpu) | |
WISPER_ASR_SERVER="${WISPER_ASR_SERVER_GPU}" | |
shift | |
;; | |
-h|--help) | |
helper_output | |
exit 0 | |
;; | |
--) | |
shift | |
break | |
;; | |
*) | |
echo "ERROR: UNKNOWN OPTIONS CASE!" | |
helper_output | |
exit 1 | |
;; | |
esac | |
done | |
# Shift the parsed options to leave only the remaining arguments | |
shift "$(($OPTIND -1))" | |
# Handle remaining arguments | |
if [ $# -eq 1 ]; then | |
if [ -z "${SOURCE_PATH}" ]; then | |
SOURCE_PATH="${1}" | |
else | |
echo "ERROR: Too many arguments. Seems like two source file paths specifited." | |
helper_output | |
exit 1 | |
fi | |
elif [ $# -gt 1 ]; then | |
echo "ERROR: Too many unflagged arguments" | |
helper_output | |
exit 1 | |
fi | |
# Error Checking | |
# check if the parent_folder and transcript_output_extention argument was provided | |
if [[ $# -lt 1 ]]; then | |
echo "Usage: $(basename "$0") SOURCE_PATH TRANSCRIPT_EXTENSIONS DESTINATION_REPROCESS SOURCE_TEMP_TRANSCODING_KEEP" | |
echo "Example: bash $(realpath "$0") \"$(pwd)\" \"txt,tsv\" \"true\" \"false\" \"false\"" | |
exit 1 | |
fi | |
# define the transcript file extension(s) | |
IFS=',' read -ra TRANSCRIPT_EXTENSIONS <<< "${2:-$TRANSCRIPT_EXTENSIONS}" # txt, vtt, srt, tsv, json | |
# check if any transcript extensions were provided | |
if [ ${#TRANSCRIPT_EXTENSIONS[@]} -eq 0 ]; then | |
echo "No transcript extensions were provided." | |
exit 1 | |
fi | |
# check if the parent folder exists | |
if [[ ! -e "${SOURCE_PATH}" ]]; then # True if the FILE exists and is a file, regardless of type (node, directory, socket, etc.). | |
echo "Source Path '${SOURCE_PATH}' does not exist" | |
exit 1 | |
fi | |
# set default server if not set | |
WISPER_ASR_SERVER="${WISPER_ASR_SERVER:-$WISPER_ASR_SERVER_GPU}" | |
# Formate Intial-Promot to URL-Encoded | |
WISPER_ASR_INITIAL_PROMPT_URL_ENCODED="$(jq -rn --arg x "${WISPER_ASR_INITIAL_PROMPT}" '$x|@uri')" | |
# Print Header | |
echo " - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -" | |
echo "Whisper ASR Webservice | v$(curl -s "${WISPER_ASR_SERVER}/openapi.json" | jq -r '.info.version') " # | ${WISPER_ASR_SERVER} | ${WISPER_ASR_TASK} | ${WISPER_ASR_LANG} | ${WISPER_ASR_INITIAL_PROMPT}" | |
echo "$(curl -s "${WISPER_ASR_SERVER}/openapi.json" | jq -r '.info.description')" | |
echo "Server: ${WISPER_ASR_SERVER}" | |
echo "Task: ${WISPER_ASR_TASK}" | |
echo "Lang: ${WISPER_ASR_LANG}" | |
echo "Initial Prompt: ${WISPER_ASR_INITIAL_PROMPT}" | |
# echo "Encoded Prompt: ${WISPER_ASR_INITIAL_PROMPT_URL_ENCODED}" | |
echo " - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -" | |
echo "Source Path: ${SOURCE_PATH}" | |
if [[ -v USER_DEFINED_DESTINATION_PATH_DIRECTORY ]]; then echo "${USER_DEFINED_DESTINATION_PATH_DIRECTORY}";fi | |
echo "Transcript Extensions: ${TRANSCRIPT_EXTENSIONS[*]}" | |
echo "Destination Output Print: ${DESTINATION_OUTPUT_PRINT}" | |
echo "Destination Reprocess: ${DESTINATION_REPROCESS}" | |
echo "Source Temp Transcoding Keep: ${SOURCE_TEMP_TRANSCODING_KEEP}" | |
echo "============================================================================" | |
# find all audio files in the source path folder and its subdirectories | |
find "${SOURCE_PATH}" -type f \( -name "*.mp3" -o -name "*.mp4" -o -name "*.wav" -o -name "*.m4a" \) | \ | |
while read -r SOURCE_PATH_FILE; do | |
# Path Varables | |
SOURCE_PATH_DIRECTORY="${SOURCE_PATH_FILE%/*}" | |
DESTINATION_PATH_DIRECTORY="${USER_DEFINED_DESTINATION_PATH_DIRECTORY:-$SOURCE_PATH_DIRECTORY}" | |
SOURCE_PATH_BASENAME="$(basename -- "$SOURCE_PATH_FILE")" | |
SOURCE_PATH_FILENAME="${SOURCE_PATH_BASENAME%.*}" | |
#SOURCE_PATH_EXTENSION="${basename##*.}" | |
SOURCE_TEMP_TRANSCODING_BASENAME="${SOURCE_PATH_FILENAME}.temp.mp3" | |
SOURCE_TEMP_TRANSCODING_PATH="${DESTINATION_PATH_DIRECTORY}/${SOURCE_TEMP_TRANSCODING_BASENAME}" | |
# Loop through each transcript extension | |
IFS=, | |
for TRANSCRIPT_EXTENSION in ${TRANSCRIPT_EXTENSIONS[@]}; do | |
# Remove any white space from TRANSCRIPT_EXTENSION | |
TRANSCRIPT_EXTENSION=$(echo "${TRANSCRIPT_EXTENSION}" | xargs) | |
# Set the destination path for the current transcript extension | |
# DESTINATION_PATH="${SOURCE_PATH_FILE%.*}.${TRANSCRIPT_EXTENSION}" | |
DESTINATION_PATH="${DESTINATION_PATH_DIRECTORY}/${SOURCE_PATH_FILENAME}.${TRANSCRIPT_EXTENSION}" | |
# If destination reprocessing is true and the destination file already exists, delete it | |
if [[ ${DESTINATION_REPROCESS} == true ]] && [ -e "${DESTINATION_PATH}" ]; then | |
echo "Deleting old destination file: ${DESTINATION_PATH}" | |
rm "${DESTINATION_PATH}" | |
fi | |
# then, check if a transcript file already exists for this audio file | |
if [ -f "${DESTINATION_PATH}" ]; then | |
echo "Transcript already exists: ${SOURCE_PATH_FILE} | ${TRANSCRIPT_EXTENSION}" | |
else | |
# transcoding m4a to mp3 or wav so is usable for transcribing, unless already transscribed (file exist and is not empty). | |
if [[ "${SOURCE_PATH_FILE}" == *.m4a ]] && ([[ ! -f "${SOURCE_TEMP_TRANSCODING_PATH}" ]] || [[ ! -s "${SOURCE_TEMP_TRANSCODING_PATH}" ]]); then | |
# if ([[ "${SOURCE_PATH_FILE}" == *.m4a ]] || [[ "${SOURCE_PATH_FILE}" == *.mp4 ]]) && ([[ ! -f "${SOURCE_TEMP_TRANSCODING_PATH}" ]] || [[ ! -s "${SOURCE_TEMP_TRANSCODING_PATH}" ]]); then | |
echo "Transcoding to MP3: ${SOURCE_PATH_FILE} -> ${SOURCE_TEMP_TRANSCODING_BASENAME} | ${TRANSCRIPT_EXTENSION}" | |
# convert the file to mp3 using FFmpeg container | |
SECONDS=0 | |
docker run --rm --volume "${SOURCE_PATH_DIRECTORY}:${SOURCE_PATH_DIRECTORY}" --volume "${DESTINATION_PATH_DIRECTORY}:${DESTINATION_PATH_DIRECTORY}" --workdir "${SOURCE_PATH_DIRECTORY}" jrottenberg/ffmpeg \ | |
-i "${SOURCE_PATH_BASENAME}" \ | |
-loglevel fatal -hide_banner -stats \ | |
-acodec libmp3lame -ac 1 -ar 16000 -ab 192k -y "${DESTINATION_PATH_DIRECTORY}/${SOURCE_TEMP_TRANSCODING_BASENAME}" && output_task_file_info ${SECONDS} ${SOURCE_PATH_FILE} || { echo "Failed to convert file to MP3."; [ -e "${SOURCE_TEMP_TRANSCODING_PATH}" ] && rm "${SOURCE_TEMP_TRANSCODING_PATH}"; exit 1; } | |
# -acodec libmp3lame -ac 1 -ar 16000 -ab 192k -y "${SOURCE_TEMP_TRANSCODING_BASENAME}" || { echo "Failed to convert file to MP3."; [ -e "${SOURCE_TEMP_TRANSCODING_PATH}" ] && rm "${SOURCE_TEMP_TRANSCODING_PATH}"; exit 1; } | |
# MP3 # -acodec libmp3lame -ac 2 -ar 44100 -ab 192k -y "${SOURCE_TEMP_TRANSCODING_BASENAME}" || { echo "Failed to convert file to MP3."; [ -e "${SOURCE_TEMP_TRANSCODING_PATH}" ] && rm "${SOURCE_TEMP_TRANSCODING_PATH}"; exit 1; } | |
# WAV # -acodec pcm_s16le -ac 1 -ar 16000 -y "${SOURCE_TEMP_TRANSCODING_BASENAME}" || { echo "Failed to convert file to WAV."; [ -e "${SOURCE_TEMP_TRANSCODING_PATH}" ] && rm "${SOURCE_TEMP_TRANSCODING_PATH}"; exit 1; } | |
fi | |
# set asr source path to source path of raw file, or transcoded file | |
if [[ "${SOURCE_PATH_FILE}" == *.m4a ]] && [[ -f "${SOURCE_TEMP_TRANSCODING_PATH}" ]]; then | |
# if ([[ "${SOURCE_PATH_FILE}" == *.m4a ]] || [[ "${SOURCE_PATH_FILE}" == *.mp4 ]]) && [[ -f "${SOURCE_TEMP_TRANSCODING_PATH}" ]]; then | |
ASR_SOURCE_PATH="${SOURCE_TEMP_TRANSCODING_PATH}" | |
else | |
ASR_SOURCE_PATH="${SOURCE_PATH_FILE}" | |
fi | |
# Print the source and destination paths along with the transcript extension | |
echo "Sending to ${WISPER_ASR_TASK^}: ${ASR_SOURCE_PATH} -> $(basename -- "$DESTINATION_PATH") [$(format_file_size ${ASR_SOURCE_PATH})|$(format_audio_durration ${ASR_SOURCE_PATH})]" # | $([[ -f "${DESTINATION_PATH}" ]] && echo true || echo false)" | |
# Send the audio file to the web service for transcription | |
SECONDS=0 | |
if ! curl --progress-bar --request 'POST' \ | |
"${WISPER_ASR_SERVER}/asr?task=${WISPER_ASR_TASK}&language=${WISPER_ASR_LANG}&initial_prompt=${WISPER_ASR_INITIAL_PROMPT_URL_ENCODED}&output=${TRANSCRIPT_EXTENSION}" \ | |
--header 'accept: application/json' \ | |
--header 'Content-Type: multipart/form-data' \ | |
--form "audio_file=@${ASR_SOURCE_PATH};type=audio/mpeg" \ | |
--output "${DESTINATION_PATH}"; then | |
echo "Failed to transcribe file: ${DESTINATION_PATH}" | |
exit 1 | |
elif [[ -f "${DESTINATION_PATH}" ]]; then | |
echo "Successful Transcription: ${DESTINATION_PATH}" | |
output_task_file_info ${SECONDS} ${SOURCE_PATH_FILE} | |
else | |
echo "Unknown Transcription Failure!" | |
fi | |
# print output from destination file. | |
if [[ "${DESTINATION_OUTPUT_PRINT}" == true ]]; then | |
# Print a separator | |
echo " - - - - - - - - - - - - - - - - - - -" | |
# Print the contents of the destination file | |
cat "${DESTINATION_PATH}" | |
fi | |
fi | |
# Print a separator | |
if [[ ${#TRANSCRIPT_EXTENSIONS[@]} -gt 1 ]]; then | |
echo " - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -" | |
fi | |
done | |
# If source temporary transcoding file exist, delete it. | |
if [[ -e "${SOURCE_TEMP_TRANSCODING_PATH}" ]] && [[ ${SOURCE_TEMP_TRANSCODING_KEEP} != true ]]; then | |
echo "Deleting Temp Transconding Audio File: ${SOURCE_TEMP_TRANSCODING_PATH}" | |
rm "${SOURCE_TEMP_TRANSCODING_PATH}" | |
fi | |
# Print a separator | |
echo "----------------------------------------------------------------------------" | |
done | |
hello, can you help me with setup. i dont know how to work with these, but know that is what i need.
Ya, what kind of help do you need? What are you trying to do specifically?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
hello,
can you help me with setup. i dont know how to work with these, but know that is what i need.