Created
August 18, 2013 20:29
-
-
Save Uberi/6263822 to your computer and use it in GitHub Desktop.
Speech recognition with Microsoft's SAPI. A simple SpeechRecognizer class provides a quick and easy way to use speech recognition in your scripts. Inspired by some [prototype code](http://www.autohotkey.com/board/topic/24490-voice-recognition-com/) made a long time ago.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#NoEnv | |
#Warn All | |
#Warn LocalSameAsGlobal, Off | |
#Persistent | |
/* | |
Speech Recognition | |
================== | |
A class providing access to Microsoft's SAPI. Requires the SAPI SDK. | |
Reference | |
--------- | |
### Recognizer := new SpeechRecognizer | |
Creates a new speech recognizer instance. | |
The instance starts off listening to any phrases. | |
### Recognizer.Recognize(Values = True) | |
Set the values that can be recognized by the recognizer. | |
If `Values` is an array of strings, the array is interpreted as a list of possibile phrases to recognize. Phrases not in the array will not be recognized. This provides a relatively high degree of recognition accuracy compared to dictation mode. | |
If `Values` is otherwise truthy, dictation mode is enabled, which means that the speech recognizer will attempt to recognize any phrases spoken. | |
If `Values` is falsy, the speech recognizer will be disabled and will stop listening if currently doing so. | |
Returns the speech recognizer instance. | |
### Recognizer.Listen(State = True) | |
Set the state of the recognizer. | |
If `State` is truthy, then the recognizer will start listening if not already doing so. | |
If `State` is falsy, then the recognizer will stop listening if currently doing so. | |
Returns the speech recognizer instance. | |
### Text := Recognizer.Prompt(Timeout = -1) | |
Obtains the next phrase spoken as plain text. | |
If `Timeout` is a positive number, the function will stop and return a blank string after this amount of time, if the user has not said anything in this interval. | |
If `Timeout` is a negative number, the function will wait indefinitely for the user to speak a phrase. | |
Returns the text spoken. | |
### Recognizer.OnRecognize(Text) | |
A callback invoked immediately upon any phrases being recognized. | |
The `Text` parameter received the phrase spoken. | |
This function is meant to be overridden in subclasses. By default, it does nothing. | |
The return value is discarded. | |
*/ | |
/* Example: recognizing a specific list of phrases | |
TrayTip, Speech Recognition, Say a number between 0 and 9 inclusive | |
s := new SpeechRecognizer | |
s.Recognize(["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]) | |
Text := s.Prompt() | |
TrayTip, Speech Recognition, You said: %Text% | |
Sleep, 3000 | |
ExitApp | |
*/ | |
/* Example: recognizing any phrase | |
TrayTip, Speech Recognition, Say something | |
s := new SpeechRecognizer | |
s.Recognize(True) | |
Text := s.Prompt() | |
TrayTip, Speech Recognition, You said: %Text% | |
Sleep, 3000 | |
ExitApp | |
*/ | |
/* Example: custom behaviour upon phrase recognition | |
TrayTip, Speech Recognition, Say something (press Escape to close) | |
s := new CustomSpeech ;create the custom speech recognizer | |
s.Recognize(True) | |
Esc::ExitApp | |
class CustomSpeech extends SpeechRecognizer | |
{ | |
OnRecognize(Text) | |
{ | |
static cSpeaker := ComObjCreate("SAPI.SpVoice") | |
TrayTip, Speech Recognition, You said: %Text% | |
cSpeaker.Speak("You said: " . Text) | |
} | |
} | |
*/ | |
class SpeechRecognizer | |
{ ;speech recognition class by Uberi | |
static Contexts := {} | |
__New() | |
{ | |
try | |
{ | |
this.cListener := ComObjCreate("SAPI.SpInprocRecognizer") ;obtain speech recognizer (ISpeechRecognizer object) | |
cAudioInputs := this.cListener.GetAudioInputs() ;obtain list of audio inputs (ISpeechObjectTokens object) | |
this.cListener.AudioInput := cAudioInputs.Item(0) ;set audio device to first input | |
} | |
catch e | |
throw Exception("Could not create recognizer: " . e.Message) | |
try this.cContext := this.cListener.CreateRecoContext() ;obtain speech recognition context (ISpeechRecoContext object) | |
catch e | |
throw Exception("Could not create recognition context: " . e.Message) | |
try this.cGrammar := this.cContext.CreateGrammar() ;obtain phrase manager (ISpeechRecoGrammar object) | |
catch e | |
throw Exception("Could not create recognition grammar: " . e.Message) | |
;create rule to use when dictation mode is off | |
try | |
{ | |
this.cRules := this.cGrammar.Rules() ;obtain list of grammar rules (ISpeechGrammarRules object) | |
this.cRule := this.cRules.Add("WordsRule",0x1 | 0x20) ;add a new grammar rule (SRATopLevel | SRADynamic) | |
} | |
catch e | |
throw Exception("Could not create speech recognition grammar rules: " . e.Message) | |
this.Phrases(["hello", "hi", "greetings", "salutations"]) | |
this.Dictate(True) | |
SpeechRecognizer.Contexts[&this.cContext] := &this ;store a weak reference to the instance so event callbacks can obtain this instance | |
this.Prompting := False ;prompting defaults to inactive | |
ComObjConnect(this.cContext, "SpeechRecognizer_") ;connect the recognition context events to functions | |
} | |
Recognize(Values = True) | |
{ | |
If Values ;enable speech recognition | |
{ | |
this.Listen(True) | |
If IsObject(Values) ;list of phrases to use | |
this.Phrases(Values) | |
Else ;recognize any phrase | |
this.Dictate(True) | |
} | |
Else ;disable speech recognition | |
this.Listen(False) | |
Return, this | |
} | |
Listen(State = True) | |
{ | |
try | |
{ | |
If State | |
this.cListener.State := 1 ;SRSActive | |
Else | |
this.cListener.State := 0 ;SRSInactive | |
} | |
catch e | |
throw Exception("Could not set listener state: " . e.Message) | |
Return, this | |
} | |
Prompt(Timeout = -1) | |
{ | |
this.Prompting := True | |
this.SpokenText := "" | |
If Timeout < 0 ;no timeout | |
{ | |
While, this.Prompting | |
Sleep, 0 | |
} | |
Else | |
{ | |
StartTime := A_TickCount | |
While, this.Prompting && (A_TickCount - StartTime) > Timeout | |
Sleep, 0 | |
} | |
Return, this.SpokenText | |
} | |
Phrases(PhraseList) | |
{ | |
try this.cRule.Clear() ;reset rule to initial state | |
catch e | |
throw Exception("Could not reset rule: " . e.Message) | |
try cState := this.cRule.InitialState() ;obtain rule initial state (ISpeechGrammarRuleState object) | |
catch e | |
throw Exception("Could not obtain rule initial state: " . e.Message) | |
;add rules to recognize | |
cNull := ComObjParameter(13,0) ;null IUnknown pointer | |
For Index, Phrase In PhraseList | |
{ | |
try cState.AddWordTransition(cNull, Phrase) ;add a no-op rule state transition triggered by a phrase | |
catch e | |
throw Exception("Could not add rule """ . Phrase . """: " . e.Message) | |
} | |
try this.cRules.Commit() ;compile all rules in the rule collection | |
catch e | |
throw Exception("Could not update rule: " . e.Message) | |
this.Dictate(False) ;disable dictation mode | |
Return, this | |
} | |
Dictate(Enable = True) | |
{ | |
try | |
{ | |
If Enable ;enable dictation mode | |
{ | |
this.cGrammar.DictationSetState(1) ;enable dictation mode (SGDSActive) | |
this.cGrammar.CmdSetRuleState("WordsRule", 0) ;disable the rule (SGDSInactive) | |
} | |
Else ;disable dictation mode | |
{ | |
this.cGrammar.DictationSetState(0) ;disable dictation mode (SGDSInactive) | |
this.cGrammar.CmdSetRuleState("WordsRule", 1) ;enable the rule (SGDSActive) | |
} | |
} | |
catch e | |
throw Exception("Could not set grammar dictation state: " . e.Message) | |
Return, this | |
} | |
OnRecognize(Text) | |
{ | |
;placeholder function meant to be overridden in subclasses | |
} | |
__Delete() | |
{ | |
;remove weak reference to the instance | |
this.base.Contexts.Remove(&this.cContext, "") | |
} | |
} | |
SpeechRecognizer_Recognition(StreamNumber, StreamPosition, RecognitionType, cResult, cContext) ;speech recognition engine produced a recognition | |
{ | |
try | |
{ | |
pPhrase := cResult.PhraseInfo() ;obtain detailed information about recognized phrase (ISpeechPhraseInfo object from ISpeechRecoResult object) | |
Text := pPhrase.GetText() ;obtain the spoken text | |
} | |
catch e | |
throw Exception("Could not obtain recognition result text: " . e.Message) | |
Instance := Object(SpeechRecognizer.Contexts[&cContext]) ;obtain reference to the recognizer | |
;handle prompting mode | |
If Instance.Prompting | |
{ | |
Instance.SpokenText := Text | |
Instance.Prompting := False | |
} | |
Instance.OnRecognize(Text) ;invoke callback in recognizer | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
How can I store the spoken response into a variable been trying for days without success, Hope someone can help.
TIA