123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374 |
- /*
- @file
- @brief a simple demo to recognize speech from microphone
- @author taozhang9
- @date 2016/05/27
- */
- #include <stdio.h>
- #include <stdlib.h>
- #include <unistd.h>
- #include <string.h>
- #include "speech_recognizer.h"
- #include "../include/qisr.h"
- #include "../include/msp_cmn.h"
- #include "../include/msp_errors.h"
- #include "linuxrec.h"
- #define SR_DBGON 1
- #if SR_DBGON == 1
- # define sr_dbg printf
- #else
- # define sr_dbg
- #endif
- #define DEFAULT_SESSION_PARA \
- "sub = iat, domain = iat, language = zh_cn, accent = mandarin, sample_rate = 16000, result_type = plain, result_encoding = UTF-8"
- #define DEFAULT_FORMAT \
- {\
- WAVE_FORMAT_PCM, \
- 1, \
- 16000, \
- 32000, \
- 2, \
- 16, \
- sizeof(WAVEFORMATEX) \
- }
- /* internal state */
- enum {
- SR_STATE_INIT,
- SR_STATE_STARTED
- };
- #define SR_MALLOC malloc
- #define SR_MFREE free
- #define SR_MEMSET memset
- static void Sleep(size_t ms)
- {
- usleep(ms*1000);
- }
- static void end_sr_on_error(struct speech_rec *sr, int errcode)
- {
- if(sr->aud_src == SR_MIC)
- stop_record(sr->recorder);
- if (sr->session_id) {
- if (sr->notif.on_speech_end)
- sr->notif.on_speech_end(errcode);
- QISRSessionEnd(sr->session_id, "err");
- sr->session_id = NULL;
- }
- sr->state = SR_STATE_INIT;
- }
- static void end_sr_on_vad(struct speech_rec *sr)
- {
- int errcode;
- const char *rslt;
- if (sr->aud_src == SR_MIC)
- stop_record(sr->recorder);
- sr->rec_stat = MSP_AUDIO_SAMPLE_CONTINUE;
- while(sr->rec_stat != MSP_REC_STATUS_COMPLETE ){
- rslt = QISRGetResult(sr->session_id, &sr->rec_stat, 0, &errcode);
- if (rslt && sr->notif.on_result)
- sr->notif.on_result(rslt, sr->rec_stat == MSP_REC_STATUS_COMPLETE ? 1 : 0);
- Sleep(100); /* for cpu occupy, should sleep here */
- }
- if (sr->session_id) {
- if (sr->notif.on_speech_end)
- sr->notif.on_speech_end(END_REASON_VAD_DETECT);
- QISRSessionEnd(sr->session_id, "VAD Normal");
- sr->session_id = NULL;
- }
- sr->state = SR_STATE_INIT;
- }
- /* the record call back */
- static void iat_cb(char *data, unsigned long len, void *user_para)
- {
- int errcode;
- struct speech_rec *sr;
- if(len == 0 || data == NULL)
- return;
- sr = (struct speech_rec *)user_para;
- if(sr == NULL || sr->ep_stat >= MSP_EP_AFTER_SPEECH)
- return;
- if (sr->state < SR_STATE_STARTED)
- return; /* ignore the data if error/vad happened */
- errcode = sr_write_audio_data(sr, data, len);
- if (errcode) {
- end_sr_on_error(sr, errcode);
- return;
- }
- }
- static char * skip_space(char *s)
- {
- while (s && *s != ' ' && *s != '\0')
- s++;
- return s;
- }
- static int update_format_from_sessionparam(const char * session_para, WAVEFORMATEX *wavefmt)
- {
- char *s;
- if ((s = strstr(session_para, "sample_rate"))) {
- s = strstr(s, "=");
- if (s && *s) {
- s = skip_space(s);
- if (s && *s) {
- wavefmt->nSamplesPerSec = atoi(s);
- wavefmt->nAvgBytesPerSec = wavefmt->nBlockAlign * wavefmt->nSamplesPerSec;
- }
- }
- else
- return -1;
- }
- else {
- return -1;
- }
- return 0;
- }
- /* devid will be ignored if aud_src is not SR_MIC ; use get_default_dev_id
- * to use the default input device. Currently the device list function is
- * not provided yet.
- */
- int sr_init_ex(struct speech_rec * sr, const char * session_begin_params,
- enum sr_audsrc aud_src, record_dev_id devid,
- struct speech_rec_notifier * notify)
- {
- int errcode;
- size_t param_size;
- WAVEFORMATEX wavfmt = DEFAULT_FORMAT;
- if (aud_src == SR_MIC && get_input_dev_num() == 0) {
- return -E_SR_NOACTIVEDEVICE;
- }
- if (!sr)
- return -E_SR_INVAL;
- if (session_begin_params == NULL) {
- session_begin_params = DEFAULT_SESSION_PARA;
- }
- SR_MEMSET(sr, 0, sizeof(struct speech_rec));
- sr->state = SR_STATE_INIT;
- sr->aud_src = aud_src;
- sr->ep_stat = MSP_EP_LOOKING_FOR_SPEECH;
- sr->rec_stat = MSP_REC_STATUS_SUCCESS;
- sr->audio_status = MSP_AUDIO_SAMPLE_FIRST;
- param_size = strlen(session_begin_params) + 1;
- sr->session_begin_params = (char*)SR_MALLOC(param_size);
- if (sr->session_begin_params == NULL) {
- sr_dbg("mem alloc failed\n");
- return -E_SR_NOMEM;
- }
- strncpy(sr->session_begin_params, session_begin_params, param_size);
- sr->notif = *notify;
- if (aud_src == SR_MIC) {
- errcode = create_recorder(&sr->recorder, iat_cb, (void*)sr);
- if (sr->recorder == NULL || errcode != 0) {
- sr_dbg("create recorder failed: %d\n", errcode);
- errcode = -E_SR_RECORDFAIL;
- goto fail;
- }
- update_format_from_sessionparam(session_begin_params, &wavfmt);
- errcode = open_recorder(sr->recorder, devid, &wavfmt);
- if (errcode != 0) {
- sr_dbg("recorder open failed: %d\n", errcode);
- errcode = -E_SR_RECORDFAIL;
- goto fail;
- }
- }
- return 0;
- fail:
- if (sr->recorder) {
- destroy_recorder(sr->recorder);
- sr->recorder = NULL;
- }
- if (sr->session_begin_params) {
- SR_MFREE(sr->session_begin_params);
- sr->session_begin_params = NULL;
- }
- SR_MEMSET(&sr->notif, 0, sizeof(sr->notif));
- return errcode;
- }
- /* use the default input device to capture the audio. see sr_init_ex */
- int sr_init(struct speech_rec * sr, const char * session_begin_params,
- enum sr_audsrc aud_src, struct speech_rec_notifier * notify)
- {
- return sr_init_ex(sr, session_begin_params, aud_src,
- get_default_input_dev(), notify);
- }
- int sr_start_listening(struct speech_rec *sr)
- {
- int ret;
- const char* session_id = NULL;
- int errcode = MSP_SUCCESS;
- if (sr->state >= SR_STATE_STARTED) {
- sr_dbg("already STARTED.\n");
- return -E_SR_ALREADY;
- }
- session_id = QISRSessionBegin(NULL, sr->session_begin_params, &errcode); //听写不需要语法,第一个参数为NULL
- if (MSP_SUCCESS != errcode)
- {
- sr_dbg("\nQISRSessionBegin failed! error code:%d\n", errcode);
- return errcode;
- }
- sr->session_id = session_id;
- sr->ep_stat = MSP_EP_LOOKING_FOR_SPEECH;
- sr->rec_stat = MSP_REC_STATUS_SUCCESS;
- sr->audio_status = MSP_AUDIO_SAMPLE_FIRST;
- if (sr->aud_src == SR_MIC) {
- ret = start_record(sr->recorder);
- if (ret != 0) {
- sr_dbg("start record failed: %d\n", ret);
- QISRSessionEnd(session_id, "start record fail");
- sr->session_id = NULL;
- return -E_SR_RECORDFAIL;
- }
- }
- sr->state = SR_STATE_STARTED;
- if (sr->notif.on_speech_begin)
- sr->notif.on_speech_begin();
- return 0;
- }
- /* after stop_record, there are still some data callbacks */
- static void wait_for_rec_stop(struct recorder *rec, unsigned int timeout_ms)
- {
- while (!is_record_stopped(rec)) {
- Sleep(1);
- if (timeout_ms != (unsigned int)-1)
- if (0 == timeout_ms--)
- break;
- }
- }
- int sr_stop_listening(struct speech_rec *sr)
- {
- int ret = 0;
- const char * rslt = NULL;
- if (sr->state < SR_STATE_STARTED) {
- sr_dbg("Not started or already stopped.\n");
- return 0;
- }
- if (sr->aud_src == SR_MIC) {
- ret = stop_record(sr->recorder);
- if (ret != 0) {
- sr_dbg("Stop failed! \n");
- return -E_SR_RECORDFAIL;
- }
- wait_for_rec_stop(sr->recorder, (unsigned int)-1);
- }
- sr->state = SR_STATE_INIT;
- ret = QISRAudioWrite(sr->session_id, NULL, 0, MSP_AUDIO_SAMPLE_LAST, &sr->ep_stat, &sr->rec_stat);
- if (ret != 0) {
- sr_dbg("write LAST_SAMPLE failed: %d\n", ret);
- QISRSessionEnd(sr->session_id, "write err");
- return ret;
- }
- sr->rec_stat = 2;
- while (sr->rec_stat != MSP_REC_STATUS_COMPLETE) {
- rslt = QISRGetResult(sr->session_id, &sr->rec_stat, 0, &ret);
- if (MSP_SUCCESS != ret) {
- sr_dbg("\nQISRGetResult failed! error code: %d\n", ret);
- end_sr_on_error(sr, ret);
- return ret;
- }
- if (NULL != rslt && sr->notif.on_result)
- sr->notif.on_result(rslt, sr->rec_stat == MSP_REC_STATUS_COMPLETE ? 1 : 0);
- Sleep(100);
- }
- QISRSessionEnd(sr->session_id, "normal");
- sr->session_id = NULL;
- return 0;
- }
- int sr_write_audio_data(struct speech_rec *sr, char *data, unsigned int len)
- {
- const char *rslt = NULL;
- int ret = 0;
- if (!sr )
- return -E_SR_INVAL;
- if (!data || !len)
- return 0;
- ret = QISRAudioWrite(sr->session_id, data, len, sr->audio_status, &sr->ep_stat, &sr->rec_stat);
- if (ret) {
- end_sr_on_error(sr, ret);
- return ret;
- }
- sr->audio_status = MSP_AUDIO_SAMPLE_CONTINUE;
- if (MSP_REC_STATUS_SUCCESS == sr->rec_stat) { //已经有部分听写结果
- rslt = QISRGetResult(sr->session_id, &sr->rec_stat, 0, &ret);
- if (MSP_SUCCESS != ret) {
- sr_dbg("\nQISRGetResult failed! error code: %d\n", ret);
- end_sr_on_error(sr, ret);
- return ret;
- }
- if (NULL != rslt && sr->notif.on_result)
- sr->notif.on_result(rslt, sr->rec_stat == MSP_REC_STATUS_COMPLETE ? 1 : 0);
- }
- if (MSP_EP_AFTER_SPEECH == sr->ep_stat)
- end_sr_on_vad(sr);
- return 0;
- }
- void sr_uninit(struct speech_rec * sr)
- {
- if (sr->recorder) {
- if(!is_record_stopped(sr->recorder))
- stop_record(sr->recorder);
- close_recorder(sr->recorder);
- destroy_recorder(sr->recorder);
- sr->recorder = NULL;
- }
- if (sr->session_begin_params) {
- SR_MFREE(sr->session_begin_params);
- sr->session_begin_params = NULL;
- }
- }
|