RickStrahl · March 30, 2025 18:46
diff --git a/VoiceDictation.cs b/VoiceDictation.cs
 using System;
 using System.Diagnostics;
 using System.Linq;
 using System.Threading.Tasks;
 using Windows.Media.SpeechRecognition;
 using System.Windows.Controls;
 using System.Windows.Input;
 using Westwind.Utilities;
 using MarkdownMonster.Windows;
 using Windows.Globalization;
 using System.Reflection;
 using Windows.Foundation;
 using System.Globalization;
 using YamlDotNet.Core.Tokens;
 using Key = System.Windows.Input.Key;

 namespace MarkdownMonster.Utilities
 {
    /// <summary>
    /// Windows.Media Speech Recognition wrapper for speech dictation.
    /// </summary>
    public class VoiceDictation
    {
        private readonly SpeechRecognizer _recognizer;
        private bool _isCompiled;
        private bool _isDisposed;

        /// <summary>
        /// Keep track of recording status
        /// </summary>
        public bool IsDictating { get; private set; }

        public Key StartDictationHotKey { get; set; } = Key.F4;

        public VoiceDictation()
        {           
            if (string.IsNullOrEmpty(mmApp.Configuration.VoiceDictationLanguage))
                _recognizer = new SpeechRecognizer();
            else
                _recognizer = new SpeechRecognizer(new Language(mmApp.Configuration.VoiceDictationLanguage));

            var dictation = new SpeechRecognitionTopicConstraint(SpeechRecognitionScenario.Dictation, "dictation");
            _recognizer.Constraints.Add(dictation);
            _recognizer.ContinuousRecognitionSession.ResultGenerated += ContinuousRecognitionSession_ResultGenerated;
            _recognizer.ContinuousRecognitionSession.Completed += ContinuousRecognitionSession_Completed;
            _recognizer.ContinuousRecognitionSession.AutoStopSilenceTimeout = TimeSpan.FromMinutes(1);
            
            // Hook up hot keys for start and stop operations
            Keyboard.AddKeyDownHandler(mmApp.Window, KeydownHandler);

           
            GetStartDictationHotkey();
        }

        private void GetStartDictationHotkey()
        {            
            var startKey = mmApp.Window.KeyBindings.KeyBindings
                                        .Where(b => b.Id == "StartDictation")
                                        .Select(b => b.Key)
                                        .FirstOrDefault();
            if (startKey == null) return;

            try
            {
                var k = new KeyConverter();
                StartDictationHotKey = (Key)k.ConvertFromString(startKey);
            }
            catch { }
        }

        private void KeydownHandler(object sender, KeyEventArgs e)
        {            

            
            if (!IsDictating && e.Key == StartDictationHotKey)
            {
                StartAsync().FireAndForget();
                Debug.WriteLine("StartAsync fired...");
            }
            else if (IsDictating && e.Key == Key.Escape)
            {
                Stop();
                Debug.WriteLine("StopAsync fired.");
            }
        }



        #region Start and Stop Listening

        /// <summary>
        /// Starts the speech recognition session. If the recognizer is not idle, it does nothing.
        /// Ensures the recognizer is compiled and starts continuous recognition.
        /// </summary>
        public async Task StartAsync(DictationListenModes listenMode = DictationListenModes.EscPressed)
        {
            if (IsDictating)
                return;

            try
            {
                await EnsureCompiledAsync();

                var action = _recognizer.ContinuousRecognitionSession.StartAsync();  // Runtime .AsTask() doesn't work here
                await AsTask(action);
                                
                IsDictating = true;

                mmApp.Window.Dispatcher.Invoke(() =>
                {
                    mmApp.Window.ShowStatusProgress($"Listening for dictation ({_recognizer.CurrentLanguage.AbbreviatedName}). ESC to stop.");
                });
            }
            catch (Exception ex) when (ex.Message.Contains("privacy"))
            {
                IsDictating = false;
                // Open the settings page for speech recognition
                ShellUtils.GoUrl("ms-settings:privacy-speech");
            }       
        }

        /// <summary>
        /// Stops the speech recognition session if it is currently running.
        /// </summary>
        public void Stop()
        {
            mmApp.Window.Dispatcher.Delay(220, async () =>
            {
                if (_recognizer.State != SpeechRecognizerState.Idle)
                {
                    try
                    {
                        var action = _recognizer.ContinuousRecognitionSession.StopAsync();   // .AsTask() doesn't work here
                        await AsTask(action);                        
                    }
                    catch (Exception ex)
                    {
                        IsDictating = false;
                        mmApp.Window.ShowStatusError($"Couldn't stop dictation engine properly: {ex.Message}");
                        mmApp.Log("couldn't stop dictation engine properly", ex);
                        return;
                    }
                }

                // always
                IsDictating = false;
                mmApp.Window.ShowStatusSuccess("Stopped listening for dictation.");
            });
        }
        #endregion

        #region Result Handlers and Events

       

        /// <summary>
        /// Event handler for when speech is successfully recognized.
        /// Simulates keyboard input based on the recognized text.
        /// </summary>
        private async void ContinuousRecognitionSession_ResultGenerated(SpeechContinuousRecognitionSession sender, SpeechContinuousRecognitionResultGeneratedEventArgs args)
        {
            if (args.Result.Status != SpeechRecognitionResultStatus.Success)
                return;

            if (!mmApp.Configuration.EnableVoiceDictation)
                return;

            var text = args.Result?.Text;
            if (string.IsNullOrEmpty(text))
                return;

            await mmApp.Window.Dispatcher.InvokeAsync(async () =>
            {
                var ctrl = Keyboard.FocusedElement;
                if (ctrl != null)
                {
                    if (ctrl is TextBox tb)
                    {
                        int caretIndex = tb.SelectionStart;
                        tb.Text = tb.Text.Insert(caretIndex, text);
                        tb.SelectionStart = tb.SelectionStart + text.Length;
                        tb.SelectionLength = 0;
                        return;
                    }
                    return;
                }

                var editor = mmApp.Model?.ActiveEditor;
                if (editor == null)
                    return;



                if (text.Trim().Equals("stop recording", StringComparison.OrdinalIgnoreCase))
                {
                    Stop();
                    return;
                }

                text = await FixUpDictatedText(text, editor);
                await editor?.SetSelectionAndFocus(text);
            });
        }


        /// <summary>
        /// Captures timeouts or other operations where dictation is completed but for
        /// some reason dictation was not stopped.
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="args"></param>
        private void ContinuousRecognitionSession_Completed(SpeechContinuousRecognitionSession sender, SpeechContinuousRecognitionCompletedEventArgs args)
        {
            if (IsDictating)
            {
                Stop();
            }
        }

        private async Task<string> FixUpDictatedText(string text, MarkdownDocumentEditor editor)
        {
            var lineText = await editor.GetCurrentLine() ?? string.Empty;
            var selPoint = await editor.GetSelectionRange();

            lineText = lineText.Substring(0, selPoint.StartColumn);
            string trimLine = lineText.TrimEnd();

            if (text.Length > 1 && (text.EndsWith('.') || text.EndsWith('?') || text.EndsWith('!') || text.EndsWith(',')))
            {
                text += " ";
            }

            if (text.Trim().Equals("space", StringComparison.OrdinalIgnoreCase) && !lineText.EndsWith(' '))
                text = " ";

            if (text.Trim().Equals("line break", StringComparison.OrdinalIgnoreCase) ||
                text.Trim().Equals("return", StringComparison.OrdinalIgnoreCase))
                text = "\n";


            // Capitalize the first character of the text if the last character of the current line is a sentence terminator
            var firstChar = text[0];
            if (string.IsNullOrWhiteSpace(trimLine))
            {
                /* Beginning of the line: do nothing.  */
            }
            else if (text == "." || text == "?" || text == "!")
            {
                return text;
            }
            else if (trimLine.EndsWith('.') || trimLine.EndsWith('!') || trimLine.EndsWith('?'))
            {
                text = firstChar.ToString().ToUpper() + text.Substring(1);
            }
            else
            {
                text = firstChar.ToString().ToLower() + text.Substring(1);
            }

            // Add a space if the last character of the current line is not a space
            if (!string.IsNullOrWhiteSpace(lineText) && !lineText.EndsWith(' '))
            {
                text = " " + text;
            }
            
            return text;
        }

        /// <summary>
        /// Ensures that the speech recognizer's constraints are compiled before starting recognition.
        /// This is necessary to prepare the recognizer for accurate speech recognition.
        /// </summary>
        private async Task EnsureCompiledAsync()
        {
            if (!_isCompiled)
            {
                var action = _recognizer.CompileConstraintsAsync();  
                //await Task.Delay(100);

                await AsTask<SpeechRecognitionCompilationResult>(action);
                
                _isCompiled = true;
            }
        }
        #endregion

 

        MethodInfo _asTaskMethod = null;
        MethodInfo _asTaskMethodGeneric = null;
        Type _WindowsRuntimeSystemExtensionsType = null;

        Task AsTask(object action)
        {            
            if (_asTaskMethod == null)
            {
                try
                {
                    if (_WindowsRuntimeSystemExtensionsType == null)
                    {
                        var assemblies = AppDomain.CurrentDomain.GetAssemblies();
                        var assembly = assemblies
                            .FirstOrDefault(a => a.GetName()?.Name == "Microsoft.Windows.SDK.NET");
                        var types = assembly.GetTypes();
                        _WindowsRuntimeSystemExtensionsType = types.FirstOrDefault(t => t.FullName == "System.WindowsRuntimeSystemExtensions");
                    }
                    _asTaskMethod = _WindowsRuntimeSystemExtensionsType.GetMethod("AsTask", [typeof(IAsyncAction)]);
                }
                catch { }
            }

            if (_asTaskMethod == null)
                return Task.Delay(20);

            var t = _asTaskMethod.Invoke(null, [action]) as Task;
            if (t == null)
                return Task.Delay(20);

            return t;            
        }


        //await AsTask<SpeechRecognitionCompilationResult>(action);
        Task AsTask<T>(object action)
        {
            if (_asTaskMethod == null)
            {
                try
                {
                    if (_WindowsRuntimeSystemExtensionsType == null)
                    {
                        var assemblies = AppDomain.CurrentDomain.GetAssemblies();
                        var assembly = assemblies
                            .FirstOrDefault(a => a.GetName()?.Name == "Microsoft.Windows.SDK.NET");
                        var types = assembly.GetTypes();
                        _WindowsRuntimeSystemExtensionsType = types.FirstOrDefault(t => t.FullName == "System.WindowsRuntimeSystemExtensions");
                    }
                    var method = _WindowsRuntimeSystemExtensionsType.GetMethods()
                            .FirstOrDefault(m => m.Name == "AsTask" && m.IsGenericMethod && m.GetParameters().Length == 1);
                    _asTaskMethodGeneric = method.MakeGenericMethod(typeof(T));
                }
                catch { }
            }

            if (_asTaskMethodGeneric == null)
                return Task.Delay(100);

            var t = _asTaskMethodGeneric.Invoke(null, [action]) as Task;
            if (t == null)
                return Task.Delay(100);

            return t;
        }

        /// <summary>
        /// Disposes the resources used by the listener.
        /// Unhooks event handlers and disposes the speech recognizer.
        /// </summary>
        public void Dispose()
        {
            if (!_isDisposed)
            {
                Keyboard.RemoveKeyDownHandler(mmApp.Window, KeydownHandler);

                _recognizer.ContinuousRecognitionSession.ResultGenerated -= ContinuousRecognitionSession_ResultGenerated;
                _recognizer?.Dispose();
                _isDisposed = true;
            }
        }
    }

    public enum DictationListenModes
    {
        EscPressed,
        OpenMic
    }
 }
	using System;
	using System.Diagnostics;
	using System.Linq;
	using System.Threading.Tasks;
	using Windows.Media.SpeechRecognition;
	using System.Windows.Controls;
	using System.Windows.Input;
	using Westwind.Utilities;
	using MarkdownMonster.Windows;
	using Windows.Globalization;
	using System.Reflection;
	using Windows.Foundation;
	using System.Globalization;
	using YamlDotNet.Core.Tokens;
	using Key = System.Windows.Input.Key;

	namespace MarkdownMonster.Utilities
	{
	/// <summary>
	/// Windows.Media Speech Recognition wrapper for speech dictation.
	/// </summary>
	public class VoiceDictation
	{
	private readonly SpeechRecognizer _recognizer;
	private bool _isCompiled;
	private bool _isDisposed;

	/// <summary>
	/// Keep track of recording status
	/// </summary>
	public bool IsDictating { get; private set; }

	public Key StartDictationHotKey { get; set; } = Key.F4;

	public VoiceDictation()
	{
	if (string.IsNullOrEmpty(mmApp.Configuration.VoiceDictationLanguage))
	_recognizer = new SpeechRecognizer();
	else
	_recognizer = new SpeechRecognizer(new Language(mmApp.Configuration.VoiceDictationLanguage));

	var dictation = new SpeechRecognitionTopicConstraint(SpeechRecognitionScenario.Dictation, "dictation");
	_recognizer.Constraints.Add(dictation);
	_recognizer.ContinuousRecognitionSession.ResultGenerated += ContinuousRecognitionSession_ResultGenerated;
	_recognizer.ContinuousRecognitionSession.Completed += ContinuousRecognitionSession_Completed;
	_recognizer.ContinuousRecognitionSession.AutoStopSilenceTimeout = TimeSpan.FromMinutes(1);

	// Hook up hot keys for start and stop operations
	Keyboard.AddKeyDownHandler(mmApp.Window, KeydownHandler);


	GetStartDictationHotkey();
	}

	private void GetStartDictationHotkey()
	{
	var startKey = mmApp.Window.KeyBindings.KeyBindings
	.Where(b => b.Id == "StartDictation")
	.Select(b => b.Key)
	.FirstOrDefault();
	if (startKey == null) return;

	try
	{
	var k = new KeyConverter();
	StartDictationHotKey = (Key)k.ConvertFromString(startKey);
	}
	catch { }
	}

	private void KeydownHandler(object sender, KeyEventArgs e)
	{


	if (!IsDictating && e.Key == StartDictationHotKey)
	{
	StartAsync().FireAndForget();
	Debug.WriteLine("StartAsync fired...");
	}
	else if (IsDictating && e.Key == Key.Escape)
	{
	Stop();
	Debug.WriteLine("StopAsync fired.");
	}
	}



	#region Start and Stop Listening

	/// <summary>
	/// Starts the speech recognition session. If the recognizer is not idle, it does nothing.
	/// Ensures the recognizer is compiled and starts continuous recognition.
	/// </summary>
	public async Task StartAsync(DictationListenModes listenMode = DictationListenModes.EscPressed)
	{
	if (IsDictating)
	return;

	try
	{
	await EnsureCompiledAsync();

	var action = _recognizer.ContinuousRecognitionSession.StartAsync(); // Runtime .AsTask() doesn't work here
	await AsTask(action);

	IsDictating = true;

	mmApp.Window.Dispatcher.Invoke(() =>
	{
	mmApp.Window.ShowStatusProgress($"Listening for dictation ({_recognizer.CurrentLanguage.AbbreviatedName}). ESC to stop.");
	});
	}
	catch (Exception ex) when (ex.Message.Contains("privacy"))
	{
	IsDictating = false;
	// Open the settings page for speech recognition
	ShellUtils.GoUrl("ms-settings:privacy-speech");
	}
	}

	/// <summary>
	/// Stops the speech recognition session if it is currently running.
	/// </summary>
	public void Stop()
	{
	mmApp.Window.Dispatcher.Delay(220, async () =>
	{
	if (_recognizer.State != SpeechRecognizerState.Idle)
	{
	try
	{
	var action = _recognizer.ContinuousRecognitionSession.StopAsync(); // .AsTask() doesn't work here
	await AsTask(action);
	}
	catch (Exception ex)
	{
	IsDictating = false;
	mmApp.Window.ShowStatusError($"Couldn't stop dictation engine properly: {ex.Message}");
	mmApp.Log("couldn't stop dictation engine properly", ex);
	return;
	}
	}

	// always
	IsDictating = false;
	mmApp.Window.ShowStatusSuccess("Stopped listening for dictation.");
	});
	}
	#endregion

	#region Result Handlers and Events



	/// <summary>
	/// Event handler for when speech is successfully recognized.
	/// Simulates keyboard input based on the recognized text.
	/// </summary>
	private async void ContinuousRecognitionSession_ResultGenerated(SpeechContinuousRecognitionSession sender, SpeechContinuousRecognitionResultGeneratedEventArgs args)
	{
	if (args.Result.Status != SpeechRecognitionResultStatus.Success)
	return;

	if (!mmApp.Configuration.EnableVoiceDictation)
	return;

	var text = args.Result?.Text;
	if (string.IsNullOrEmpty(text))
	return;

	await mmApp.Window.Dispatcher.InvokeAsync(async () =>
	{
	var ctrl = Keyboard.FocusedElement;
	if (ctrl != null)
	{
	if (ctrl is TextBox tb)
	{
	int caretIndex = tb.SelectionStart;
	tb.Text = tb.Text.Insert(caretIndex, text);
	tb.SelectionStart = tb.SelectionStart + text.Length;
	tb.SelectionLength = 0;
	return;
	}
	return;
	}

	var editor = mmApp.Model?.ActiveEditor;
	if (editor == null)
	return;



	if (text.Trim().Equals("stop recording", StringComparison.OrdinalIgnoreCase))
	{
	Stop();
	return;
	}

	text = await FixUpDictatedText(text, editor);
	await editor?.SetSelectionAndFocus(text);
	});
	}


	/// <summary>
	/// Captures timeouts or other operations where dictation is completed but for
	/// some reason dictation was not stopped.
	/// </summary>
	/// <param name="sender"></param>
	/// <param name="args"></param>
	private void ContinuousRecognitionSession_Completed(SpeechContinuousRecognitionSession sender, SpeechContinuousRecognitionCompletedEventArgs args)
	{
	if (IsDictating)
	{
	Stop();
	}
	}

	private async Task<string> FixUpDictatedText(string text, MarkdownDocumentEditor editor)
	{
	var lineText = await editor.GetCurrentLine() ?? string.Empty;
	var selPoint = await editor.GetSelectionRange();

	lineText = lineText.Substring(0, selPoint.StartColumn);
	string trimLine = lineText.TrimEnd();

	if (text.Length > 1 && (text.EndsWith('.') \|\| text.EndsWith('?') \|\| text.EndsWith('!') \|\| text.EndsWith(',')))
	{
	text += " ";
	}

	if (text.Trim().Equals("space", StringComparison.OrdinalIgnoreCase) && !lineText.EndsWith(' '))
	text = " ";

	if (text.Trim().Equals("line break", StringComparison.OrdinalIgnoreCase) \|\|
	text.Trim().Equals("return", StringComparison.OrdinalIgnoreCase))
	text = "\n";


	// Capitalize the first character of the text if the last character of the current line is a sentence terminator
	var firstChar = text[0];
	if (string.IsNullOrWhiteSpace(trimLine))
	{
	/* Beginning of the line: do nothing. */
	}
	else if (text == "." \|\| text == "?" \|\| text == "!")
	{
	return text;
	}
	else if (trimLine.EndsWith('.') \|\| trimLine.EndsWith('!') \|\| trimLine.EndsWith('?'))
	{
	text = firstChar.ToString().ToUpper() + text.Substring(1);
	}
	else
	{
	text = firstChar.ToString().ToLower() + text.Substring(1);
	}

	// Add a space if the last character of the current line is not a space
	if (!string.IsNullOrWhiteSpace(lineText) && !lineText.EndsWith(' '))
	{
	text = " " + text;
	}

	return text;
	}

	/// <summary>
	/// Ensures that the speech recognizer's constraints are compiled before starting recognition.
	/// This is necessary to prepare the recognizer for accurate speech recognition.
	/// </summary>
	private async Task EnsureCompiledAsync()
	{
	if (!_isCompiled)
	{
	var action = _recognizer.CompileConstraintsAsync();
	//await Task.Delay(100);

	await AsTask<SpeechRecognitionCompilationResult>(action);

	_isCompiled = true;
	}
	}
	#endregion



	MethodInfo _asTaskMethod = null;
	MethodInfo _asTaskMethodGeneric = null;
	Type _WindowsRuntimeSystemExtensionsType = null;

	Task AsTask(object action)
	{
	if (_asTaskMethod == null)
	{
	try
	{
	if (_WindowsRuntimeSystemExtensionsType == null)
	{
	var assemblies = AppDomain.CurrentDomain.GetAssemblies();
	var assembly = assemblies
	.FirstOrDefault(a => a.GetName()?.Name == "Microsoft.Windows.SDK.NET");
	var types = assembly.GetTypes();
	_WindowsRuntimeSystemExtensionsType = types.FirstOrDefault(t => t.FullName == "System.WindowsRuntimeSystemExtensions");
	}
	_asTaskMethod = _WindowsRuntimeSystemExtensionsType.GetMethod("AsTask", [typeof(IAsyncAction)]);
	}
	catch { }
	}

	if (_asTaskMethod == null)
	return Task.Delay(20);

	var t = _asTaskMethod.Invoke(null, [action]) as Task;
	if (t == null)
	return Task.Delay(20);

	return t;
	}


	//await AsTask<SpeechRecognitionCompilationResult>(action);
	Task AsTask<T>(object action)
	{
	if (_asTaskMethod == null)
	{
	try
	{
	if (_WindowsRuntimeSystemExtensionsType == null)
	{
	var assemblies = AppDomain.CurrentDomain.GetAssemblies();
	var assembly = assemblies
	.FirstOrDefault(a => a.GetName()?.Name == "Microsoft.Windows.SDK.NET");
	var types = assembly.GetTypes();
	_WindowsRuntimeSystemExtensionsType = types.FirstOrDefault(t => t.FullName == "System.WindowsRuntimeSystemExtensions");
	}
	var method = _WindowsRuntimeSystemExtensionsType.GetMethods()
	.FirstOrDefault(m => m.Name == "AsTask" && m.IsGenericMethod && m.GetParameters().Length == 1);
	_asTaskMethodGeneric = method.MakeGenericMethod(typeof(T));
	}
	catch { }
	}

	if (_asTaskMethodGeneric == null)
	return Task.Delay(100);

	var t = _asTaskMethodGeneric.Invoke(null, [action]) as Task;
	if (t == null)
	return Task.Delay(100);

	return t;
	}

	/// <summary>
	/// Disposes the resources used by the listener.
	/// Unhooks event handlers and disposes the speech recognizer.
	/// </summary>
	public void Dispose()
	{
	if (!_isDisposed)
	{
	Keyboard.RemoveKeyDownHandler(mmApp.Window, KeydownHandler);

	_recognizer.ContinuousRecognitionSession.ResultGenerated -= ContinuousRecognitionSession_ResultGenerated;
	_recognizer?.Dispose();
	_isDisposed = true;
	}
	}
	}

	public enum DictationListenModes
	{
	EscPressed,
	OpenMic
	}
	}