Last active
November 8, 2022 17:10
-
-
Save dichternebel/ee891d703c537faf17d15447011bffc9 to your computer and use it in GitHub Desktop.
PowerShell script to use Azure Speech Service TTS
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# PowerShell script to use Azure Speech Service TTS and play given string argument as a soundfile directly | |
# | |
# Author: https://github.com/dichternebel | |
# Docs: https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech?tabs=nonstreaming#convert-text-to-speech | |
# Test voices: https://azure.microsoft.com/en-us/products/cognitive-services/text-to-speech/#overview | |
# Prepare | |
$subscriptionKey = 'YourVerySecretKeyGoesHere' # paste your Azure Speech key here | |
$azureRegion = 'westeurope' # Change this to match the region of your Azure Speech service | |
$voice = 'de-DE-AmalaNeural' # https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=stt-tts | |
$tempo = '20%' # increases/decreases the tempo by given percentage | |
$pitch = '-10%' # pitches the voice by given percentage | |
# | |
# no need to change things below this line! | |
# | |
# Get text taking the first argument, you need to use quotes to pass sentences. | |
$Content = $args[0] | |
if ([string]::IsNullOrEmpty($Content)) { | |
# Tell user being a noob by not giving an argument to the script | |
$voice = 'de-DE-GiselaNeural' | |
$tempo = '10%' | |
$pitch = '0%' | |
$Content = 'Los gib Argument, du Kacknuub und mach mal was Geiles, Digger!' | |
} | |
# Auth | |
$FetchTokenHeader = @{ | |
'Content-type' = 'application/x-www-form-urlencoded'; | |
'Content-Length' = '0'; | |
'Ocp-Apim-Subscription-Key' = $subscriptionKey | |
} | |
$OAuthToken = Invoke-RestMethod -Method POST -Uri https://$azureRegion.api.cognitive.microsoft.com/sts/v1.0/issuetoken -Headers $FetchTokenHeader | |
# https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech?tabs=nonstreaming#audio-outputs | |
$AudioOutputType='riff-48khz-16bit-mono-pcm' | |
# Prepare REST call | |
$Uri = "https://$azureRegion.tts.speech.microsoft.com/cognitiveservices/v1" | |
$Headers = @{ | |
'X-Microsoft-OutputFormat' = $AudioOutputType; | |
'User-Agent' = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1'; | |
'Authorization' = "Bearer: $OAuthToken"; | |
'Host' = "$azureRegion.tts.speech.microsoft.com" | |
} | |
$Body = @" | |
<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US"> | |
<voice name="$voice"> | |
<prosody rate="$tempo" pitch="$pitch"> | |
$Content | |
</prosody> | |
</voice> | |
</speak> | |
"@ | |
# Prepare output file | |
$timestamp = Get-Date -Format FileDateTime | |
$FilePath = "$PSScriptRoot/output_$timestamp.wav" | |
# Post payload and get WAV file | |
# Hint: If you do not set the ContentType here, then e.g. umlauts will not work! | |
Invoke-RestMethod -Uri $Uri -Method POST -Headers $Headers -ContentType 'application/ssml+xml; charset=utf-8' -Body $Body -OutFile $FilePath | |
# Play it | |
$Soundplayer=New-object System.Media.Soundplayer | |
$Soundplayer.SoundLocation = $FilePath | |
$Soundplayer.playsync() | |
# Clean up the mess | |
Remove-Item $FilePath |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment