-
-
Save sniper7kills/6775dacd2b2b0892ec4932975ee04aa3 to your computer and use it in GitHub Desktop.
Freepbx Voicemail Transcription Script: AWS Transcribe API
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
# sendmail-aws | |
# | |
# Installation instructions | |
# Copy the content of this file to /usr/sbin/sendmail-aws | |
# Modify line #69 with your S3 bucket name | |
# | |
# AWS | |
# --------------- | |
# Create an AWS account if you don't have one yet. | |
# | |
# Create an S3 bucket to store the VM's and transcriptions | |
# | |
# Create a role for your AWS instance granting permissions to your S3 bucket, and to AWS transcribe | |
# Attach that role to your instance so the AWS cli doesn't need to have credentials assigned to it within the instance | |
# | |
# From the Linux command line on the FreePBX machine | |
# ------------------------------------------- | |
# | |
# Verify that you have the following (by simply running the command) and if not use yum install; | |
# aws | |
# jq | |
# sox | |
# flac | |
# dos2unix -V | |
# | |
# Follow the instructions to install aws cli https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html | |
# | |
# Run the following commands on FreePBX; | |
# | |
# curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" | |
# unzip -qq awscliv2.zip | |
# sudo ./aws/install | |
# sudo yum install jq | |
# cd /usr/sbin/ | |
# chown asterisk:asterisk sendmail-aws | |
# chmod 744 sendmail-aws | |
# | |
# | |
# Open FreePBX web interface | |
# Go to Settings > Voicemail Admin > Settings > Email Config | |
# Change Mail Command to: /usr/sbin/sendmail-aws | |
# Submit and apply changes | |
# | |
# Original source created by N. Bernaerts: https://github.com/NicolasBernaerts/debian-scripts/tree/master/asterisk | |
# modified per: https://jrklein.com/2015/08/17/asterisk-voicemail-transcription-via-ibm-bluemix-speech-to-text-api/ | |
# modified per: https://gist.github.com/lgaetz/2cd9c54fb1714e0d509f5f8215b3f5e6 | |
# modified per: https://gist.github.com/tony722/7c6d86be2e74fa10a1f344a4c2b093ea | |
# current version: https://gist.github.com/sniper7kills/6775dacd2b2b0892ec4932975ee04aa3 | |
# | |
# Notes: This is a script modified from the original to work with FreePBX so that email notifications sent from | |
# Asterisk voicemail contain a speech to text transcription provided by AWS Transcribe API | |
# | |
# License: There are no explicit license terms on the original script or on the blog post with modifications | |
# I'm assumig GNU/GPL2+ unless notified otherwise by copyright holder(s) | |
# | |
# Version History: | |
# 2023-01-30 Modified for use with AWS Transcribe instead of of Google | |
# 2021-05-06 Add fix by dcat127: trim flac file to 59 seconds | |
# 2020-08-27 Add fix by chrisduncansn | |
# Minor edit in instruction wording | |
# 2020-05-27 Add instructions from sr10952 | |
# Add export fix by levishores | |
# 2019-02-27 Initial commit by tony722 | |
# set PATH | |
PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" | |
S3_BUCKET="freepbx-transcriptions" | |
# save the current directory | |
pushd . | |
# create a temporary directory and cd to it | |
TMPDIR=$(mktemp -d) | |
cd $TMPDIR | |
# dump the stream to a temporary file | |
cat >> stream.org | |
# get the boundary | |
BOUNDARY=$(grep "boundary=" stream.org | cut -d'"' -f 2) | |
# if mail has no boundaries, assume no attachment | |
if [ "$BOUNDARY" = "" ] | |
then | |
# send the original stream | |
mv stream.org stream.new | |
else | |
# cut the original stream into parts | |
# stream.part - header before the boundary | |
# stream.part1 - header after the bounday | |
# stream.part2 - body of the message | |
# stream.part3 - attachment in base64 (WAV file) | |
# stream.part4 - footer of the message | |
awk '/'$BOUNDARY'/{i++}{print > "stream.part"i}' stream.org | |
# cut the attachment into parts | |
# stream.part3.head - header of attachment | |
# stream.part3.wav.base64 - wav file of attachment (encoded base64) | |
sed '7,$d' stream.part3 > stream.part3.wav.head | |
sed '1,6d' stream.part3 > stream.part3.wav.base64 | |
# convert the base64 file to a wav file | |
dos2unix -o stream.part3.wav.base64 | |
base64 -di stream.part3.wav.base64 > stream.part3.wav | |
# convert the wav file to FLAC | |
sox -G stream.part3.wav --channels=1 --bits=16 --rate=8000 stream.part3.flac trim 0 59 | |
# convert to MP3 | |
sox stream.part3.wav stream.part3-pcm.wav | |
lame -m m -b 24 stream.part3-pcm.wav stream.part3.mp3 | |
base64 stream.part3.mp3 > stream.part3.mp3.base64 | |
# create mp3 mail part | |
sed 's/x-[wW][aA][vV]/mpeg/g' stream.part3.wav.head | sed 's/.[wW][aA][vV]/.mp3/g' > stream.part3.new | |
dos2unix -o stream.part3.new | |
unix2dos -o stream.part3.mp3.base64 | |
cat stream.part3.mp3.base64 >> stream.part3.new | |
# save voicemail in tmp folder in case of trouble | |
# TMPMP3=$(mktemp -u /tmp/msg_XXXXXXXX.mp3) | |
# cp "stream.part3.mp3" "$TMPMP3" | |
# Create a string based on the current date and time | |
current_date_time="$(date +%Y-%m-%d_%H-%M-%S)" | |
# Upload to the S3 Bucket | |
aws s3 cp stream.part3.mp3 s3://$S3_BUCKET/$current_date_time | |
# Start the transcription job | |
output=$(aws transcribe start-transcription-job \ | |
--transcription-job-name $current_date_time \ | |
--language-code en-US \ | |
--media-format mp3 \ | |
--media MediaFileUri=s3://$S3_BUCKET/$current_date_time \ | |
--output-bucket-name $S3_BUCKET) | |
# Wait for the transcription to finish | |
JOB_COMPLETED=false | |
while [ "$JOB_COMPLETED" = false ]; do | |
JOB_STATUS=$(aws transcribe get-transcription-job \ | |
--transcription-job-name $current_date_time \ | |
--query 'TranscriptionJob.TranscriptionJobStatus' \ | |
--output text) | |
if [ "$JOB_STATUS" = "COMPLETED" ]; then | |
JOB_COMPLETED=true | |
else | |
sleep 5 | |
fi | |
done | |
# Get the transcription result | |
aws s3 cp s3://$S3_BUCKET/$current_date_time.json . | |
# Get the transcription result | |
FILTERED=$(jq -r '.results.transcripts[].transcript' $current_date_time.json) | |
# generate first part of mail body, converting it to LF only | |
mv stream.part stream.new | |
cat stream.part1 >> stream.new | |
sed '$d' < stream.part2 >> stream.new | |
# beginning of transcription section | |
echo "" >> stream.new | |
echo "--- AWS Transcription Result ---" >> stream.new | |
echo "" >> stream.new | |
# append result of transcription | |
if [ -z "$FILTERED" ] | |
then | |
echo "(AWS was unable to recognize any speech in audio data.)" >> stream.new | |
else | |
echo "$FILTERED" >> stream.new | |
fi | |
# end of message body | |
tail -1 stream.part2 >> stream.new | |
# add converted attachment | |
cat stream.part3.new >> stream.new | |
# append end of mail body, converting it to LF only | |
echo "" >> stream.tmp | |
echo "" >> stream.tmp | |
cat stream.part4 >> stream.tmp | |
dos2unix -o stream.tmp | |
cat stream.tmp >> stream.new | |
fi | |
# send the mail thru sendmail | |
cat stream.new | sendmail -t | |
# go back to original directory | |
popd | |
# remove all temporary files and temporary directory | |
rm -Rf $TMPDIR | |
# remove the files uploaded to s3 | |
aws s3 rm s3://$S3_BUCKET/$current_date_time | |
aws s3 rm s3://$S3_BUCKET/$current_date_time.json | |
aws transcribe delete-transcription-job --transcription-job-name $current_date_time |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment