dichternebel · November 8, 2022 17:10
diff --git a/AzureTTS.ps1 b/AzureTTS.ps1
 #
 # PowerShell script to use Azure Speech Service TTS and play given string argument as a soundfile directly
 #
 # Author:      https://github.com/dichternebel
 # Docs:        https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech?tabs=nonstreaming#convert-text-to-speech
 # Test voices: https://azure.microsoft.com/en-us/products/cognitive-services/text-to-speech/#overview

 # Prepare
 $subscriptionKey = 'YourVerySecretKeyGoesHere' # paste your Azure Speech key here
 $azureRegion = 'westeurope' # Change this to match the region of your Azure Speech service
 $voice = 'de-DE-AmalaNeural' # https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=stt-tts
 $tempo = '20%' # increases/decreases the tempo by given percentage
 $pitch = '-10%' # pitches the voice by given percentage
 #
 # no need to change things below this line!
 #

 # Get text taking the first argument, you need to use quotes to pass sentences.
 $Content = $args[0]
 if ([string]::IsNullOrEmpty($Content)) {
  # Tell user being a noob by not giving an argument to the script
  $voice = 'de-DE-GiselaNeural'
  $tempo = '10%'
  $pitch = '0%'
  $Content = 'Los gib Argument, du Kacknuub und mach mal was Geiles, Digger!'
 }

 # Auth
 $FetchTokenHeader = @{
  'Content-type' = 'application/x-www-form-urlencoded';
  'Content-Length' = '0';
  'Ocp-Apim-Subscription-Key' = $subscriptionKey
 }
 $OAuthToken = Invoke-RestMethod -Method POST -Uri https://$azureRegion.api.cognitive.microsoft.com/sts/v1.0/issuetoken -Headers $FetchTokenHeader

 # https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech?tabs=nonstreaming#audio-outputs
 $AudioOutputType='riff-48khz-16bit-mono-pcm' 

 # Prepare REST call
 $Uri = "https://$azureRegion.tts.speech.microsoft.com/cognitiveservices/v1"

 $Headers = @{
  'X-Microsoft-OutputFormat' = $AudioOutputType;
  'User-Agent' = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1';
  'Authorization' = "Bearer: $OAuthToken";
  'Host' = "$azureRegion.tts.speech.microsoft.com"
  }

 $Body = @"
 <speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US">
  <voice name="$voice">
    <prosody rate="$tempo" pitch="$pitch">
      $Content
    </prosody>
  </voice>
 </speak>
 "@

 # Prepare output file
 $timestamp = Get-Date -Format FileDateTime
 $FilePath = "$PSScriptRoot/output_$timestamp.wav"

 # Post payload and get WAV file
 # Hint: If you do not set the ContentType here, then e.g. umlauts will not work!
 Invoke-RestMethod -Uri $Uri -Method POST -Headers $Headers -ContentType 'application/ssml+xml; charset=utf-8' -Body $Body -OutFile $FilePath

 # Play it
 $Soundplayer=New-object System.Media.Soundplayer
 $Soundplayer.SoundLocation = $FilePath
 $Soundplayer.playsync()

 # Clean up the mess
 Remove-Item $FilePath
	#
	# PowerShell script to use Azure Speech Service TTS and play given string argument as a soundfile directly
	#
	# Author: https://github.com/dichternebel
	# Docs: https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech?tabs=nonstreaming#convert-text-to-speech
	# Test voices: https://azure.microsoft.com/en-us/products/cognitive-services/text-to-speech/#overview

	# Prepare
	$subscriptionKey = 'YourVerySecretKeyGoesHere' # paste your Azure Speech key here
	$azureRegion = 'westeurope' # Change this to match the region of your Azure Speech service
	$voice = 'de-DE-AmalaNeural' # https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support?tabs=stt-tts
	$tempo = '20%' # increases/decreases the tempo by given percentage
	$pitch = '-10%' # pitches the voice by given percentage
	#
	# no need to change things below this line!
	#

	# Get text taking the first argument, you need to use quotes to pass sentences.
	$Content = $args[0]
	if ([string]::IsNullOrEmpty($Content)) {
	# Tell user being a noob by not giving an argument to the script
	$voice = 'de-DE-GiselaNeural'
	$tempo = '10%'
	$pitch = '0%'
	$Content = 'Los gib Argument, du Kacknuub und mach mal was Geiles, Digger!'
	}

	# Auth
	$FetchTokenHeader = @{
	'Content-type' = 'application/x-www-form-urlencoded';
	'Content-Length' = '0';
	'Ocp-Apim-Subscription-Key' = $subscriptionKey
	}
	$OAuthToken = Invoke-RestMethod -Method POST -Uri https://$azureRegion.api.cognitive.microsoft.com/sts/v1.0/issuetoken -Headers $FetchTokenHeader

	# https://learn.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech?tabs=nonstreaming#audio-outputs
	$AudioOutputType='riff-48khz-16bit-mono-pcm'

	# Prepare REST call
	$Uri = "https://$azureRegion.tts.speech.microsoft.com/cognitiveservices/v1"

	$Headers = @{
	'X-Microsoft-OutputFormat' = $AudioOutputType;
	'User-Agent' = 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:15.0) Gecko/20100101 Firefox/15.0.1';
	'Authorization' = "Bearer: $OAuthToken";
	'Host' = "$azureRegion.tts.speech.microsoft.com"
	}

	$Body = @"
	<speak xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="http://www.w3.org/2001/mstts" xmlns:emo="http://www.w3.org/2009/10/emotionml" version="1.0" xml:lang="en-US">
	<voice name="$voice">
	<prosody rate="$tempo" pitch="$pitch">
	$Content
	</prosody>
	</voice>
	</speak>
	"@

	# Prepare output file
	$timestamp = Get-Date -Format FileDateTime
	$FilePath = "$PSScriptRoot/output_$timestamp.wav"

	# Post payload and get WAV file
	# Hint: If you do not set the ContentType here, then e.g. umlauts will not work!
	Invoke-RestMethod -Uri $Uri -Method POST -Headers $Headers -ContentType 'application/ssml+xml; charset=utf-8' -Body $Body -OutFile $FilePath

	# Play it
	$Soundplayer=New-object System.Media.Soundplayer
	$Soundplayer.SoundLocation = $FilePath
	$Soundplayer.playsync()

	# Clean up the mess
	Remove-Item $FilePath