C++应用SAPI实现语音合成和语音识别的方法和代码
C++使用SAPI实现语音合成和语音识别的方法和代码
语音合成:
微软的语音识别,在这里我们简称它为SR(speech recognition),SR分为两种模式的监听:第一种模式:任意监听,即随意输入语音,监听对象将最为接近的字或者词,句反馈出来;第二种模式:划定范围监听,制定一组被选项做为监听的,用户的语音输入被反馈成最为接近的一个选项。说得通俗一些:第一种是填空题,第二种是选择题目。
下面是第一种模式的代码:
语音合成:
#include <sapi.h> #pragma comment(lib,"ole32.lib") //CoInitialize CoCreateInstance需要调用ole32.dll #pragma comment(lib,"sapi.lib") //sapi.lib在SDK的lib目录,必需正确配置 int main(int argc, char* argv[]) { ISpVoice * pVoice = NULL; //COM初始化: if (FAILED(::CoInitialize(NULL))) return FALSE; //获取ISpVoice接口: HRESULT hr = CoCreateInstance(CLSID_SpVoice, NULL, CLSCTX_ALL, IID_ISpVoice, (void **)&pVoice); if( SUCCEEDED( hr ) ) { hr = pVoice->Speak(L"Hello world", 0, NULL); pVoice->Release(); pVoice = NULL; } //千万不要忘记: ::CoUninitialize(); return TRUE; }
微软的语音识别,在这里我们简称它为SR(speech recognition),SR分为两种模式的监听:第一种模式:任意监听,即随意输入语音,监听对象将最为接近的字或者词,句反馈出来;第二种模式:划定范围监听,制定一组被选项做为监听的,用户的语音输入被反馈成最为接近的一个选项。说得通俗一些:第一种是填空题,第二种是选择题目。
下面是第一种模式的代码:
#include <windows.h> #include <sapi.h> #include <stdio.h> #include <string.h> #include <atlbase.h> #include "sphelper.h" inline HRESULT BlockForResult(ISpRecoContext * pRecoCtxt, ISpRecoResult ** ppResult) { HRESULT hr = S_OK; CSpEvent event; while (SUCCEEDED(hr) && SUCCEEDED(hr = event.GetFrom(pRecoCtxt)) && hr == S_FALSE) { hr = pRecoCtxt->WaitForNotifyEvent(INFINITE); } *ppResult = event.RecoResult(); if (*ppResult) { (*ppResult)->AddRef(); } return hr; } const WCHAR * StopWord() { const WCHAR * pchStop; LANGID LangId = ::SpGetUserDefaultUILanguage(); switch (LangId) { case MAKELANGID(LANG_JAPANESE, SUBLANG_DEFAULT): pchStop = L"}42N86\0b70e50fc0ea0e70fc/05708504608a087046";; break; default: pchStop = L"Stop"; break; } return pchStop; } int main(int argc, char* argv[]) { HRESULT hr = E_FAIL; bool fUseTTS = true; // turn TTS play back on or off bool fReplay = true; // turn Audio replay on or off // Process optional arguments if (argc > 1) { int i; for (i = 1; i < argc; i++) { if (_stricmp(argv[i], "-noTTS") == 0) { fUseTTS = false; continue; } if (_stricmp(argv[i], "-noReplay") == 0) { fReplay = false; continue; } printf ("Usage: %s [-noTTS] [-noReplay] ", argv[0]); return hr; } } if (SUCCEEDED(hr = ::CoInitialize(NULL))) { { CComPtr<ISpRecoContext> cpRecoCtxt; CComPtr<ISpRecoGrammar> cpGrammar; CComPtr<ISpVoice> cpVoice; hr = cpRecoCtxt.CoCreateInstance(CLSID_SpSharedRecoContext); if(SUCCEEDED(hr)) { hr = cpRecoCtxt->GetVoice(&cpVoice); } if (cpRecoCtxt && cpVoice && SUCCEEDED(hr = cpRecoCtxt->SetNotifyWin32Event()) && SUCCEEDED(hr = cpRecoCtxt->SetInterest(SPFEI(SPEI_RECOGNITION), SPFEI(SPEI_RECOGNITION))) && SUCCEEDED(hr = cpRecoCtxt->SetAudioOptions(SPAO_RETAIN_AUDIO, NULL, NULL)) && SUCCEEDED(hr = cpRecoCtxt->CreateGrammar(0, &cpGrammar)) && SUCCEEDED(hr = cpGrammar->LoadDictation(NULL, SPLO_STATIC)) && SUCCEEDED(hr = cpGrammar->SetDictationState(SPRS_ACTIVE))) { USES_CONVERSION; const WCHAR * const pchStop = StopWord(); CComPtr<ISpRecoResult> cpResult; printf( "I will repeat everything you say. Say "%s" to exit. ", W2A(pchStop) ); while (SUCCEEDED(hr = BlockForResult(cpRecoCtxt, &cpResult))) { cpGrammar->SetDictationState( SPRS_INACTIVE ); CSpDynamicString dstrText; if (SUCCEEDED(cpResult->GetText(SP_GETWHOLEPHRASE, SP_GETWHOLEPHRASE, TRUE, &dstrText, NULL))) { printf("I heard: %s ", W2A(dstrText)); if (fUseTTS) { cpVoice->Speak( L"I heard", SPF_ASYNC, NULL); cpVoice->Speak( dstrText, SPF_ASYNC, NULL ); } if (fReplay) { if (fUseTTS) cpVoice->Speak( L"when you said", SPF_ASYNC, NULL); else printf (" when you said "); cpResult->SpeakAudio(NULL, 0, NULL, NULL); } cpResult.Release(); } if (_wcsicmp(dstrText, pchStop) == 0) { break; } cpGrammar->SetDictationState( SPRS_ACTIVE ); } } } ::CoUninitialize(); } return hr; }