dspinellis · October 8, 2019 10:30
diff --git a/sapi-speech-to-text.cpp b/sapi-speech-to-text.cpp
 /*
 * Convert the specified speech WAV file into text output
 * on the program's standard output.
 *
 * Diomidis Spinellis, October 2019
 * Based on https://stackoverflow.com/a/40002268/20520
 */

 #include <iostream>
 #include <sapi.h>
 #include <sphelper.h>

 int main(int argc, char* argv[])
 {
 	if (argc != 2) {
 		std::cerr << "Usage: " << argv[0] << " file.wav\n";
 		return 1;
 	}

 	::CoInitialize(NULL);

 	HRESULT hr = S_OK;
 	CComPtr<ISpStream> cpInputStream;
 	CComPtr<ISpRecognizer> cpRecognizer;
 	CComPtr<ISpRecoContext> cpRecoContext;
 	CComPtr<ISpRecoGrammar> cpRecoGrammar;
 	hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
 	hr = cpInputStream.CoCreateInstance(CLSID_SpStream);
 	std::string sInputFileName(argv[1]);
 	std::wstring wInputFileName(sInputFileName.begin(), sInputFileName.end());
 	hr = cpInputStream->BindToFile(wInputFileName.c_str(), SPFM_OPEN_READONLY, NULL, NULL, SPFEI_ALL_EVENTS);
 	if (FAILED(hr)) {
 		std::cerr << "Unable to open " << argv[1] << '\n';
 		return 1;
 	}
 	hr = cpRecognizer->SetInput(cpInputStream, TRUE);
 	hr = cpRecognizer->CreateRecoContext(&cpRecoContext);
 	hr = cpRecoContext->CreateGrammar(NULL, &cpRecoGrammar);
 	hr = cpRecoGrammar->LoadDictation(NULL, SPLO_STATIC);

 	hr = cpRecoContext->SetNotifyWin32Event();
 	hr = cpRecoContext->SetInterest(SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM), SPFEI(SPEI_RECOGNITION) | SPFEI(SPEI_END_SR_STREAM));
 	hr = cpRecoGrammar->SetDictationState(SPRS_ACTIVE);
 	BOOL fEndStreamReached = FALSE;

 	while (!fEndStreamReached && cpRecoContext->WaitForNotifyEvent(INFINITE) == S_OK) {
 		CSpEvent spEvent;
 		ISpRecoResult *pPhrase;
 		SPPHRASE *phrase;

 		 while (!fEndStreamReached && spEvent.GetFrom(cpRecoContext) == S_OK) {

 			switch (spEvent.eEventId) {
 			case SPEI_RECOGNITION:
 				pPhrase = spEvent.RecoResult();
 				phrase = NULL;
 				pPhrase->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, NULL, NULL);
 				pPhrase->GetPhrase(&phrase);

 				if (phrase == NULL || phrase->pElements == NULL)
 					break;

 				for (int i = 0; i < phrase->Rule.ulCountOfElements; i++)
 					if (phrase->pElements[i].pszDisplayText != NULL)
 						std::wcout << phrase->pElements[i].pszDisplayText << ' ';
 				break;
 			case SPEI_END_SR_STREAM:
 				fEndStreamReached = TRUE;
 				break;
 			}
 			spEvent.Clear();
 		}
 	}
 	hr = cpRecoGrammar->SetDictationState(SPRS_INACTIVE);
 	hr = cpRecoGrammar->UnloadDictation();
 	hr = cpInputStream->Close();


 	::CoUninitialize();

 	std::wcout << '\n';
 	return 0;
 }
	/*
	* Convert the specified speech WAV file into text output
	* on the program's standard output.
	*
	* Diomidis Spinellis, October 2019
	* Based on https://stackoverflow.com/a/40002268/20520
	*/

	#include <iostream>
	#include <sapi.h>
	#include <sphelper.h>

	int main(int argc, char* argv[])
	{
	if (argc != 2) {
	std::cerr << "Usage: " << argv[0] << " file.wav\n";
	return 1;
	}

	::CoInitialize(NULL);

	HRESULT hr = S_OK;
	CComPtr<ISpStream> cpInputStream;
	CComPtr<ISpRecognizer> cpRecognizer;
	CComPtr<ISpRecoContext> cpRecoContext;
	CComPtr<ISpRecoGrammar> cpRecoGrammar;
	hr = cpRecognizer.CoCreateInstance(CLSID_SpInprocRecognizer);
	hr = cpInputStream.CoCreateInstance(CLSID_SpStream);
	std::string sInputFileName(argv[1]);
	std::wstring wInputFileName(sInputFileName.begin(), sInputFileName.end());
	hr = cpInputStream->BindToFile(wInputFileName.c_str(), SPFM_OPEN_READONLY, NULL, NULL, SPFEI_ALL_EVENTS);
	if (FAILED(hr)) {
	std::cerr << "Unable to open " << argv[1] << '\n';
	return 1;
	}
	hr = cpRecognizer->SetInput(cpInputStream, TRUE);
	hr = cpRecognizer->CreateRecoContext(&cpRecoContext);
	hr = cpRecoContext->CreateGrammar(NULL, &cpRecoGrammar);
	hr = cpRecoGrammar->LoadDictation(NULL, SPLO_STATIC);

	hr = cpRecoContext->SetNotifyWin32Event();
	hr = cpRecoContext->SetInterest(SPFEI(SPEI_RECOGNITION) \| SPFEI(SPEI_END_SR_STREAM), SPFEI(SPEI_RECOGNITION) \| SPFEI(SPEI_END_SR_STREAM));
	hr = cpRecoGrammar->SetDictationState(SPRS_ACTIVE);
	BOOL fEndStreamReached = FALSE;

	while (!fEndStreamReached && cpRecoContext->WaitForNotifyEvent(INFINITE) == S_OK) {
	CSpEvent spEvent;
	ISpRecoResult *pPhrase;
	SPPHRASE *phrase;

	while (!fEndStreamReached && spEvent.GetFrom(cpRecoContext) == S_OK) {

	switch (spEvent.eEventId) {
	case SPEI_RECOGNITION:
	pPhrase = spEvent.RecoResult();
	phrase = NULL;
	pPhrase->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, NULL, NULL);
	pPhrase->GetPhrase(&phrase);

	if (phrase == NULL \|\| phrase->pElements == NULL)
	break;

	for (int i = 0; i < phrase->Rule.ulCountOfElements; i++)
	if (phrase->pElements[i].pszDisplayText != NULL)
	std::wcout << phrase->pElements[i].pszDisplayText << ' ';
	break;
	case SPEI_END_SR_STREAM:
	fEndStreamReached = TRUE;
	break;
	}
	spEvent.Clear();
	}
	}
	hr = cpRecoGrammar->SetDictationState(SPRS_INACTIVE);
	hr = cpRecoGrammar->UnloadDictation();
	hr = cpInputStream->Close();


	::CoUninitialize();

	std::wcout << '\n';
	return 0;
	}