Last active
January 11, 2021 11:55
-
-
Save darrenjrobinson/a203b820dbe359499d58bcec3e2a5abd to your computer and use it in GitHub Desktop.
Azure Cognitive Services Text to Speech (MP3)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Text to Speech w/ Azure Cognitive Services | |
$txt2SpeechTokenURI = "https://api.cognitive.microsoft.com/sts/v1.0/issueToken" | |
$key1 = "yourAPIKey" | |
# Generate Request Auth Headers | |
$TokenHeaders = @{"Ocp-Apim-Subscription-Key" = $key1; | |
"Content-Length"= "0"; | |
"Content-type" = "application/x-www-form-urlencoded"} | |
# Get OAuth Token | |
$OAuthToken = Invoke-RestMethod -Method POST -Uri $txt2SpeechTokenURI -Headers $TokenHeaders | |
# Output Settings - Audio Player and Filename and Path | |
Add-Type -AssemblyName presentationCore | |
$mediaPlayer = New-Object system.windows.media.mediaplayer | |
# Output Path | |
$audioPath = "C:\temp\" | |
# Output File | |
$audiofile = "audiooutexample.mp3" | |
# Text to Speech Endpoint | |
$URI = "https://speech.platform.bing.com/synthesize" | |
# Headers | |
$headers = @{"Ocp-Apim-Subscription-Key" = $key1; | |
"Content-Type" = "application/ssml+xml"; | |
"X-Microsoft-OutputFormat" = "audio-16khz-32kbitrate-mono-mp3"; | |
"User-Agent" = "MIMText2Speech"; | |
"Authorization" = $OAuthToken} | |
# Output formats | |
#ssml-16khz-16bit-mono-tts | |
#raw-16khz-16bit-mono-pcm | |
#audio-16khz-16kbps-mono-siren | |
#riff-16khz-16kbps-mono-siren | |
#riff-16khz-16bit-mono-pcm | |
#audio-16khz-128kbitrate-mono-mp3 | |
#audio-16khz-64kbitrate-mono-mp3 | |
#audio-16khz-32kbitrate-mono-mp3 | |
# Voices https://docs.microsoft.com/en-us/azure/cognitive-services/speech/api-reference-rest/bingvoiceoutput#SupLocales | |
#Microsoft Server Speech Text to Speech Voice (en-US, JessaRUS) | |
#Microsoft Server Speech Text to Speech Voice (en-GB, Susan, Apollo) | |
#Microsoft Server Speech Text to Speech Voice (en-AU, HayleyRUS) | |
[xml]$Voice = @' | |
<speak version='1.0' xmlns="http://www.w3.org/2001/10/synthesis" xml:lang='en-US'> | |
<voice name='Microsoft Server Speech Text to Speech Voice (en-AU, HayleyRUS)'> | |
TEXTTOCONVERT | |
</voice> | |
</speak> | |
'@ | |
# Inject text to convert | |
$Voice.speak.voice.'#text' = "I just converted this string to speech using Azure Cognitive Services" | |
$Voice.speak.voice.'#text' | |
# Send request for conversion | |
Invoke-RestMethod -Method POST -Uri $URI -Headers $headers -Body $voice -ContentType "application/ssml+xml" -OutFile "$($audioPath)$($audiofile)" | |
# small delay for file to be written, open the file and play | |
start-sleep -Seconds 1 | |
$mediaPlayer.open($audioPath + $audiofile) | |
Start-Sleep -Seconds 1 | |
$responseDuration = $mediaPlayer.NaturalDuration.TimeSpan.TotalSeconds | |
$mediaPlayer.Play() | |
Start-Sleep -Seconds $responseDuration | |
$mediaPlayer.Close() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment