Created
July 17, 2014 15:42
-
-
Save ceee/447f567c8467c7d9b3f4 to your computer and use it in GitHub Desktop.
Memory efficient TTS background audio in WinRT
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Newtonsoft.Json; | |
using Poki.Utilities; | |
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.IO; | |
using System.Text; | |
using System.Threading; | |
using System.Threading.Tasks; | |
using System.Xml.Linq; | |
using Windows.ApplicationModel.Background; | |
using Windows.Foundation.Collections; | |
using Windows.Media; | |
using Windows.Media.Playback; | |
using Windows.Media.SpeechSynthesis; | |
using System.Linq; | |
using Windows.Foundation; | |
namespace Poki.AudioTask | |
{ | |
enum ForegroundAppStatus | |
{ | |
Active, | |
Suspended, | |
Unknown | |
} | |
public sealed class AudioBackgroundTask : IBackgroundTask | |
{ | |
#region Private fields, properties | |
private SystemMediaTransportControls systemmediatransportcontrol; | |
private BackgroundTaskDeferral deferral; | |
private ForegroundAppStatus foregroundAppState = ForegroundAppStatus.Unknown; | |
private AutoResetEvent BackgroundTaskStarted = new AutoResetEvent(false); | |
private bool backgroundtaskrunning = false; | |
private AudioStream stream; | |
#endregion | |
#region IBackgroundTask and IBackgroundTaskInstance Interface Members and handlers | |
/// <summary> | |
/// The Run method is the entry point of a background task. | |
/// </summary> | |
/// <param name="taskInstance"></param> | |
public void Run(IBackgroundTaskInstance taskInstance) | |
{ | |
Debug.WriteLine("audio: starting"); | |
// Initialize SMTC object to talk with UVC. | |
//Note that, this is intended to run after app is paused and | |
//hence all the logic must be written to run in background process | |
systemmediatransportcontrol = SystemMediaTransportControls.GetForCurrentView(); | |
systemmediatransportcontrol.ButtonPressed += systemmediatransportcontrol_ButtonPressed; | |
systemmediatransportcontrol.IsEnabled = true; | |
systemmediatransportcontrol.IsPauseEnabled = true; | |
systemmediatransportcontrol.IsPlayEnabled = true; | |
systemmediatransportcontrol.IsNextEnabled = false; | |
systemmediatransportcontrol.IsPreviousEnabled = false; | |
// Associate a cancellation and completed handlers with the background task. | |
taskInstance.Canceled += new BackgroundTaskCanceledEventHandler(OnCanceled); | |
taskInstance.Task.Completed += Taskcompleted; | |
var value = SettingsHelper.ReadResetSettingsValue(AudioConstants.AppState); | |
foregroundAppState = value == null ? ForegroundAppStatus.Unknown : (ForegroundAppStatus)Enum.Parse(typeof(ForegroundAppStatus), value.ToString()); | |
// Add handlers for MediaPlayer | |
BackgroundMediaPlayer.Current.CurrentStateChanged += Current_CurrentStateChanged; | |
BackgroundMediaPlayer.Current.MediaEnded += Current_MediaEnded; | |
// Initialize message channel | |
BackgroundMediaPlayer.MessageReceivedFromForeground += BackgroundMediaPlayer_MessageReceivedFromForeground; | |
// Send information to foreground that background task has been started if app is active | |
if (foregroundAppState != ForegroundAppStatus.Suspended) | |
{ | |
ValueSet message = new ValueSet(); | |
message.Add(AudioConstants.BackgroundTaskStarted, ""); | |
BackgroundMediaPlayer.SendMessageToForeground(message); | |
} | |
BackgroundTaskStarted.Set(); | |
backgroundtaskrunning = true; | |
SettingsHelper.SaveSettingsValue(AudioConstants.BackgroundTaskState, AudioConstants.BackgroundTaskRunning); | |
deferral = taskInstance.GetDeferral(); | |
} | |
/// <summary> | |
/// Indicate that the background task is completed. | |
/// </summary> | |
void Taskcompleted(BackgroundTaskRegistration sender, BackgroundTaskCompletedEventArgs args) | |
{ | |
Debug.WriteLine("MyBackgroundAudioTask " + sender.TaskId + " Completed..."); | |
deferral.Complete(); | |
} | |
/// <summary> | |
/// Handles background task cancellation. Task cancellation happens due to : | |
/// 1. Another Media app comes into foreground and starts playing music | |
/// 2. Resource pressure. Your task is consuming more CPU and memory than allowed. | |
/// In either case, save state so that if foreground app resumes it can know where to start. | |
/// </summary> | |
private void OnCanceled(IBackgroundTaskInstance sender, BackgroundTaskCancellationReason reason) | |
{ | |
// You get some time here to save your state before process and resources are reclaimed | |
Debug.WriteLine("MyBackgroundAudioTask " + sender.Task.TaskId + " Cancel Requested..."); | |
try | |
{ | |
//save state | |
SettingsHelper.SaveSettingsValue(AudioConstants.Position, BackgroundMediaPlayer.Current.Position.ToString()); | |
SettingsHelper.SaveSettingsValue(AudioConstants.BackgroundTaskState, AudioConstants.BackgroundTaskCancelled); | |
SettingsHelper.SaveSettingsValue(AudioConstants.AppState, Enum.GetName(typeof(ForegroundAppStatus), foregroundAppState)); | |
backgroundtaskrunning = false; | |
//unsubscribe event handlers | |
systemmediatransportcontrol.ButtonPressed -= systemmediatransportcontrol_ButtonPressed; | |
BackgroundMediaPlayer.Shutdown(); // shutdown media pipeline | |
} | |
catch (Exception ex) | |
{ | |
Debug.WriteLine(ex.ToString()); | |
} | |
if (deferral != null) deferral.Complete(); // signals task completion. | |
Debug.WriteLine("MyBackgroundAudioTask Cancel complete..."); | |
} | |
#endregion | |
#region SysteMediaTransportControls related functions and handlers | |
/// <summary> | |
/// Update UVC using SystemMediaTransPortControl apis | |
/// </summary> | |
private void UpdateUVCOnNewTrack() | |
{ | |
systemmediatransportcontrol.PlaybackStatus = MediaPlaybackStatus.Playing; | |
systemmediatransportcontrol.DisplayUpdater.Type = MediaPlaybackType.Music; | |
systemmediatransportcontrol.DisplayUpdater.MusicProperties.Title = stream.Data.Title; | |
systemmediatransportcontrol.DisplayUpdater.MusicProperties.Artist = "Poki"; | |
systemmediatransportcontrol.DisplayUpdater.Update(); | |
} | |
/// <summary> | |
/// This function controls the button events from UVC. | |
/// This code if not run in background process, will not be able to handle button pressed events when app is suspended. | |
/// </summary> | |
/// <param name="sender"></param> | |
/// <param name="args"></param> | |
private void systemmediatransportcontrol_ButtonPressed(SystemMediaTransportControls sender, SystemMediaTransportControlsButtonPressedEventArgs args) | |
{ | |
switch (args.Button) | |
{ | |
case SystemMediaTransportControlsButton.Play: | |
Debug.WriteLine("audio: UVC play button pressed"); | |
bool success = false; | |
try | |
{ | |
BackgroundMediaPlayer.Current.Play(); | |
success = true; | |
} | |
catch (Exception) | |
{ | |
success = false; | |
} | |
// If music is in paused state, for a period of more than 5 minutes, | |
//app will get task cancellation and it cannot run code. | |
//However, user can still play music by pressing play via UVC unless a new app comes in clears UVC. | |
//When this happens, the task gets re-initialized and that is asynchronous and hence the wait | |
if (!success) | |
{ | |
if (!backgroundtaskrunning) | |
{ | |
bool result = BackgroundTaskStarted.WaitOne(2000); | |
if (!result) | |
throw new Exception("audio: Background Task didnt initialize in time"); | |
} | |
StartPlayback(); | |
} | |
break; | |
case SystemMediaTransportControlsButton.Pause: | |
Debug.WriteLine("audio: UVC pause button pressed"); | |
try | |
{ | |
BackgroundMediaPlayer.Current.Pause(); | |
} | |
catch (Exception ex) | |
{ | |
Debug.WriteLine(ex.ToString()); | |
} | |
break; | |
} | |
} | |
#endregion | |
#region Playlist management functions and handlers | |
/// <summary> | |
/// Starts the playback from the current position | |
/// </summary> | |
private async void StartPlayback() | |
{ | |
if (stream == null) | |
{ | |
return; | |
} | |
try | |
{ | |
SpeechSynthesisStream synthStream = await stream.StreamNext(); | |
if (synthStream != null) | |
{ | |
BackgroundMediaPlayer.Current.SetStreamSource(synthStream); | |
BackgroundMediaPlayer.Current.Play(); | |
UpdateUVCOnNewTrack(); | |
} | |
} | |
catch (Exception) | |
{ | |
} | |
} | |
/// <summary> | |
/// Fires when the currently played media ended | |
/// And resumes with the next text part or stops (when no part available) | |
/// </summary> | |
private void Current_MediaEnded(MediaPlayer sender, object args) | |
{ | |
StartPlayback(); | |
} | |
#endregion | |
#region Background Media Player Handlers | |
void Current_CurrentStateChanged(MediaPlayer sender, object args) | |
{ | |
if (sender.CurrentState == MediaPlayerState.Playing) | |
{ | |
systemmediatransportcontrol.PlaybackStatus = MediaPlaybackStatus.Playing; | |
} | |
else if (sender.CurrentState == MediaPlayerState.Paused) | |
{ | |
systemmediatransportcontrol.PlaybackStatus = MediaPlaybackStatus.Paused; | |
} | |
} | |
/// <summary> | |
/// Fires when a message is recieved from the foreground app | |
/// </summary> | |
/// <param name="sender"></param> | |
/// <param name="e"></param> | |
void BackgroundMediaPlayer_MessageReceivedFromForeground(object sender, MediaPlayerDataReceivedEventArgs e) | |
{ | |
foreach (string key in e.Data.Keys) | |
{ | |
Debug.WriteLine("audio: message received - " + key); | |
switch (key.ToLower()) | |
{ | |
// set new content/article | |
case AudioConstants.Content: | |
stream = new AudioStream( | |
JsonConvert.DeserializeObject<AudioData>((string)e.Data[AudioConstants.Content]) | |
); | |
break; | |
// in case the app was suspended | |
case AudioConstants.AppSuspended: | |
foregroundAppState = ForegroundAppStatus.Suspended; | |
break; | |
// restart playback from the beginning | |
case AudioConstants.RestartPlayback: | |
if (stream != null) | |
{ | |
stream.ResetPosition(); | |
if (BackgroundMediaPlayer.Current.CurrentState == MediaPlayerState.Playing) | |
{ | |
BackgroundMediaPlayer.Current.Pause(); | |
BackgroundMediaPlayer.Current.Position = TimeSpan.Zero; | |
StartPlayback(); | |
} | |
} | |
break; | |
// stop playback and reset position | |
case AudioConstants.StopPlayback: | |
if (stream != null) | |
{ | |
stream.ResetPosition(); | |
} | |
if (BackgroundMediaPlayer.Current.CanPause) | |
{ | |
BackgroundMediaPlayer.Current.Pause(); | |
BackgroundMediaPlayer.Current.Position = TimeSpan.Zero; | |
} | |
break; | |
// in case the app was resumed | |
case AudioConstants.AppResumed: | |
foregroundAppState = ForegroundAppStatus.Active; | |
break; | |
// start playback | |
case AudioConstants.StartPlayback: | |
StartPlayback(); | |
break; | |
} | |
} | |
} | |
#endregion | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Newtonsoft.Json; | |
using Windows.Media.SpeechSynthesis; | |
namespace Poki.AudioTask | |
{ | |
[JsonObject] | |
public sealed class AudioData | |
{ | |
public string Content { get; set; } | |
public string VoiceID { get; set; } | |
public string DesiredLanguage { get; set; } | |
public string Title { get; set; } | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using Poki.Utilities; | |
using System; | |
using System.Collections.Generic; | |
using System.Diagnostics; | |
using System.Linq; | |
using System.Threading.Tasks; | |
using System.Xml.Linq; | |
using Windows.Foundation; | |
using Windows.Media.SpeechSynthesis; | |
namespace Poki.AudioTask | |
{ | |
public sealed class AudioStream | |
{ | |
public AudioData Data { get; set; } | |
private SpeechSynthesizer Synthesizer; | |
private int currentPosition = 0; | |
private IEnumerable<string> contentParts; | |
/// <summary> | |
/// Creates a new instance of the AudioStream. | |
/// </summary> | |
public AudioStream(AudioData data) | |
{ | |
Data = data; | |
Synthesizer = new SpeechSynthesizer(); | |
Synthesizer.Voice = SpeechSynthesizer.AllVoices.FirstOrDefault(item => item.Id == Data.VoiceID) ?? SpeechSynthesizer.DefaultVoice; | |
contentParts = Data.Content.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries); | |
} | |
/// <summary> | |
/// Resets the current position | |
/// </summary> | |
public void ResetPosition() | |
{ | |
currentPosition = 0; | |
} | |
/// <summary> | |
/// Streams the next part | |
/// </summary> | |
public IAsyncOperation<SpeechSynthesisStream> StreamNext() | |
{ | |
Debug.WriteLine("audio stream: " + currentPosition.ToString()); | |
int max = Options.AudioStreamCacheSize; // in words | |
int take = max; | |
// get part of text for reading | |
bool reachedLimit = false; | |
List<string> parts = contentParts | |
.Skip(currentPosition) | |
.TakeWhile(part => | |
{ | |
bool result = reachedLimit; | |
reachedLimit = (take-- < 0 && (part.EndsWith(".") || part.EndsWith("?") || part.EndsWith("!"))) || take < -max; | |
return !result; | |
}) | |
.ToList(); | |
int contentCount = parts.Count(); | |
// this is the last part | |
if (contentCount == 0) | |
{ | |
ResetPosition(); | |
return null; | |
} | |
IAsyncOperation<SpeechSynthesisStream> streamOperation = CreateSSMLStream(parts, Data.Title, Synthesizer.Voice, currentPosition == 0).AsAsyncOperation<SpeechSynthesisStream>(); | |
currentPosition = currentPosition + contentCount; | |
return streamOperation; | |
} | |
/// <summary> | |
/// Creates the SSML. | |
/// </summary> | |
private async Task<SpeechSynthesisStream> CreateSSMLStream(IEnumerable<string> contentParts, string title, VoiceInformation voice, bool appendTitle = true) | |
{ | |
// split text into paragraphs | |
contentParts = String.Join(" ", contentParts).Split(new string[] { "\r\n", "\n" }, StringSplitOptions.RemoveEmptyEntries); | |
string[] innerParts; | |
XElement pElement; | |
XNamespace ns = "http://www.w3.org/2001/10/synthesis"; | |
// create root | |
XDocument document = new XDocument( | |
new XElement(ns + "speak", | |
new XAttribute("version", "1.0"), | |
new XAttribute("lang", "__REPLACE__") | |
) | |
); | |
// append title | |
if (appendTitle) | |
{ | |
document.Root.Add( | |
new XCData(title), | |
new XElement(ns + "break") | |
); | |
} | |
// create paragraphs | |
foreach (string part in contentParts) | |
{ | |
// split paragraph into sentences | |
innerParts = part.Split(new string[] { ". ", "! ", "? " }, StringSplitOptions.RemoveEmptyEntries); | |
pElement = new XElement(ns + "p"); | |
foreach (string innerPart in innerParts) | |
{ | |
pElement.Add( | |
new XElement(ns + "s", | |
new XCData(innerPart) | |
) | |
); | |
} | |
document.Root.Add(pElement); | |
} | |
// create stream from XML | |
return await Synthesizer.SynthesizeSsmlToStreamAsync(document.Root | |
.ToString() | |
.Replace("lang=\"__REPLACE__\"", "xml:lang=\"" + voice.Language + "\"") | |
.Replace(" xmlns=\"\"", "")); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
1.) A message is sent to the background task which contains an
AudioData
instance with the text and the voice information.2.) The
AudioStream
instance is created from theAudioData
.3.) The
AudioStream
creates theSpeechSynthesizer
with the correct language and splits the content into words.4.) When the startplayback message is received,
AudioStream.StreamNext()
method is called, which generates theSpeechSynthesisStream
.5.) The
StreamNext
method skips as much words as incurrentPosition
is stored and takes at leastOptions.AudioStreamCacheSize
(in my case 100) and stops when it encounters a punctuation or exceeds 200 words.6.) Not plain text is created, but SSML (a variant of XML). The text is split into paragraphs (
p
) and sentences (s
).7.) The SSML is converted to a Stream and assigned to the
BackgroundMediaPlayer
.8.) As soon as the media player reaches the end, the event
MediaEnded
is triggered, which callsStreamNext()
again, but now with the new position. This is repeated until no more words are available.