speech_recognizer.c 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. /*
  2. @file
  3. @brief a simple demo to recognize speech from microphone
  4. @author taozhang9
  5. @date 2016/05/27
  6. */
  7. #include <stdio.h>
  8. #include <stdlib.h>
  9. #include <unistd.h>
  10. #include <string.h>
  11. #include "speech_recognizer.h"
  12. #include "../include/qisr.h"
  13. #include "../include/msp_cmn.h"
  14. #include "../include/msp_errors.h"
  15. #include "linuxrec.h"
  16. #define SR_DBGON 1
  17. #if SR_DBGON == 1
  18. # define sr_dbg printf
  19. #else
  20. # define sr_dbg
  21. #endif
  22. #define DEFAULT_SESSION_PARA \
  23. "sub = iat, domain = iat, language = zh_cn, accent = mandarin, sample_rate = 16000, result_type = plain, result_encoding = UTF-8"
  24. #define DEFAULT_FORMAT \
  25. {\
  26. WAVE_FORMAT_PCM, \
  27. 1, \
  28. 16000, \
  29. 32000, \
  30. 2, \
  31. 16, \
  32. sizeof(WAVEFORMATEX) \
  33. }
  34. /* internal state */
  35. enum {
  36. SR_STATE_INIT,
  37. SR_STATE_STARTED
  38. };
  39. #define SR_MALLOC malloc
  40. #define SR_MFREE free
  41. #define SR_MEMSET memset
  42. static void Sleep(size_t ms)
  43. {
  44. usleep(ms*1000);
  45. }
  46. static void end_sr_on_error(struct speech_rec *sr, int errcode)
  47. {
  48. if(sr->aud_src == SR_MIC)
  49. stop_record(sr->recorder);
  50. if (sr->session_id) {
  51. if (sr->notif.on_speech_end)
  52. sr->notif.on_speech_end(errcode);
  53. QISRSessionEnd(sr->session_id, "err");
  54. sr->session_id = NULL;
  55. }
  56. sr->state = SR_STATE_INIT;
  57. }
  58. static void end_sr_on_vad(struct speech_rec *sr)
  59. {
  60. int errcode;
  61. const char *rslt;
  62. if (sr->aud_src == SR_MIC)
  63. stop_record(sr->recorder);
  64. sr->rec_stat = MSP_AUDIO_SAMPLE_CONTINUE;
  65. while(sr->rec_stat != MSP_REC_STATUS_COMPLETE ){
  66. rslt = QISRGetResult(sr->session_id, &sr->rec_stat, 0, &errcode);
  67. if (rslt && sr->notif.on_result)
  68. sr->notif.on_result(rslt, sr->rec_stat == MSP_REC_STATUS_COMPLETE ? 1 : 0);
  69. Sleep(100); /* for cpu occupy, should sleep here */
  70. }
  71. if (sr->session_id) {
  72. if (sr->notif.on_speech_end)
  73. sr->notif.on_speech_end(END_REASON_VAD_DETECT);
  74. QISRSessionEnd(sr->session_id, "VAD Normal");
  75. sr->session_id = NULL;
  76. }
  77. sr->state = SR_STATE_INIT;
  78. }
  79. /* the record call back */
  80. static void iat_cb(char *data, unsigned long len, void *user_para)
  81. {
  82. int errcode;
  83. struct speech_rec *sr;
  84. if(len == 0 || data == NULL)
  85. return;
  86. sr = (struct speech_rec *)user_para;
  87. if(sr == NULL || sr->ep_stat >= MSP_EP_AFTER_SPEECH)
  88. return;
  89. if (sr->state < SR_STATE_STARTED)
  90. return; /* ignore the data if error/vad happened */
  91. errcode = sr_write_audio_data(sr, data, len);
  92. if (errcode) {
  93. end_sr_on_error(sr, errcode);
  94. return;
  95. }
  96. }
  97. static char * skip_space(char *s)
  98. {
  99. while (s && *s != ' ' && *s != '\0')
  100. s++;
  101. return s;
  102. }
  103. static int update_format_from_sessionparam(const char * session_para, WAVEFORMATEX *wavefmt)
  104. {
  105. char *s;
  106. if ((s = strstr(session_para, "sample_rate"))) {
  107. s = strstr(s, "=");
  108. if (s && *s) {
  109. s = skip_space(s);
  110. if (s && *s) {
  111. wavefmt->nSamplesPerSec = atoi(s);
  112. wavefmt->nAvgBytesPerSec = wavefmt->nBlockAlign * wavefmt->nSamplesPerSec;
  113. }
  114. }
  115. else
  116. return -1;
  117. }
  118. else {
  119. return -1;
  120. }
  121. return 0;
  122. }
  123. /* devid will be ignored if aud_src is not SR_MIC ; use get_default_dev_id
  124. * to use the default input device. Currently the device list function is
  125. * not provided yet.
  126. */
  127. int sr_init_ex(struct speech_rec * sr, const char * session_begin_params,
  128. enum sr_audsrc aud_src, record_dev_id devid,
  129. struct speech_rec_notifier * notify)
  130. {
  131. int errcode;
  132. size_t param_size;
  133. WAVEFORMATEX wavfmt = DEFAULT_FORMAT;
  134. if (aud_src == SR_MIC && get_input_dev_num() == 0) {
  135. return -E_SR_NOACTIVEDEVICE;
  136. }
  137. if (!sr)
  138. return -E_SR_INVAL;
  139. if (session_begin_params == NULL) {
  140. session_begin_params = DEFAULT_SESSION_PARA;
  141. }
  142. SR_MEMSET(sr, 0, sizeof(struct speech_rec));
  143. sr->state = SR_STATE_INIT;
  144. sr->aud_src = aud_src;
  145. sr->ep_stat = MSP_EP_LOOKING_FOR_SPEECH;
  146. sr->rec_stat = MSP_REC_STATUS_SUCCESS;
  147. sr->audio_status = MSP_AUDIO_SAMPLE_FIRST;
  148. param_size = strlen(session_begin_params) + 1;
  149. sr->session_begin_params = (char*)SR_MALLOC(param_size);
  150. if (sr->session_begin_params == NULL) {
  151. sr_dbg("mem alloc failed\n");
  152. return -E_SR_NOMEM;
  153. }
  154. strncpy(sr->session_begin_params, session_begin_params, param_size);
  155. sr->notif = *notify;
  156. if (aud_src == SR_MIC) {
  157. errcode = create_recorder(&sr->recorder, iat_cb, (void*)sr);
  158. if (sr->recorder == NULL || errcode != 0) {
  159. sr_dbg("create recorder failed: %d\n", errcode);
  160. errcode = -E_SR_RECORDFAIL;
  161. goto fail;
  162. }
  163. update_format_from_sessionparam(session_begin_params, &wavfmt);
  164. errcode = open_recorder(sr->recorder, devid, &wavfmt);
  165. if (errcode != 0) {
  166. sr_dbg("recorder open failed: %d\n", errcode);
  167. errcode = -E_SR_RECORDFAIL;
  168. goto fail;
  169. }
  170. }
  171. return 0;
  172. fail:
  173. if (sr->recorder) {
  174. destroy_recorder(sr->recorder);
  175. sr->recorder = NULL;
  176. }
  177. if (sr->session_begin_params) {
  178. SR_MFREE(sr->session_begin_params);
  179. sr->session_begin_params = NULL;
  180. }
  181. SR_MEMSET(&sr->notif, 0, sizeof(sr->notif));
  182. return errcode;
  183. }
  184. /* use the default input device to capture the audio. see sr_init_ex */
  185. int sr_init(struct speech_rec * sr, const char * session_begin_params,
  186. enum sr_audsrc aud_src, struct speech_rec_notifier * notify)
  187. {
  188. return sr_init_ex(sr, session_begin_params, aud_src,
  189. get_default_input_dev(), notify);
  190. }
  191. int sr_start_listening(struct speech_rec *sr)
  192. {
  193. int ret;
  194. const char* session_id = NULL;
  195. int errcode = MSP_SUCCESS;
  196. if (sr->state >= SR_STATE_STARTED) {
  197. sr_dbg("already STARTED.\n");
  198. return -E_SR_ALREADY;
  199. }
  200. session_id = QISRSessionBegin(NULL, sr->session_begin_params, &errcode); //听写不需要语法,第一个参数为NULL
  201. if (MSP_SUCCESS != errcode)
  202. {
  203. sr_dbg("\nQISRSessionBegin failed! error code:%d\n", errcode);
  204. return errcode;
  205. }
  206. sr->session_id = session_id;
  207. sr->ep_stat = MSP_EP_LOOKING_FOR_SPEECH;
  208. sr->rec_stat = MSP_REC_STATUS_SUCCESS;
  209. sr->audio_status = MSP_AUDIO_SAMPLE_FIRST;
  210. if (sr->aud_src == SR_MIC) {
  211. ret = start_record(sr->recorder);
  212. if (ret != 0) {
  213. sr_dbg("start record failed: %d\n", ret);
  214. QISRSessionEnd(session_id, "start record fail");
  215. sr->session_id = NULL;
  216. return -E_SR_RECORDFAIL;
  217. }
  218. }
  219. sr->state = SR_STATE_STARTED;
  220. if (sr->notif.on_speech_begin)
  221. sr->notif.on_speech_begin();
  222. return 0;
  223. }
  224. /* after stop_record, there are still some data callbacks */
  225. static void wait_for_rec_stop(struct recorder *rec, unsigned int timeout_ms)
  226. {
  227. while (!is_record_stopped(rec)) {
  228. Sleep(1);
  229. if (timeout_ms != (unsigned int)-1)
  230. if (0 == timeout_ms--)
  231. break;
  232. }
  233. }
  234. int sr_stop_listening(struct speech_rec *sr)
  235. {
  236. int ret = 0;
  237. const char * rslt = NULL;
  238. if (sr->state < SR_STATE_STARTED) {
  239. sr_dbg("Not started or already stopped.\n");
  240. return 0;
  241. }
  242. if (sr->aud_src == SR_MIC) {
  243. ret = stop_record(sr->recorder);
  244. if (ret != 0) {
  245. sr_dbg("Stop failed! \n");
  246. return -E_SR_RECORDFAIL;
  247. }
  248. wait_for_rec_stop(sr->recorder, (unsigned int)-1);
  249. }
  250. sr->state = SR_STATE_INIT;
  251. ret = QISRAudioWrite(sr->session_id, NULL, 0, MSP_AUDIO_SAMPLE_LAST, &sr->ep_stat, &sr->rec_stat);
  252. if (ret != 0) {
  253. sr_dbg("write LAST_SAMPLE failed: %d\n", ret);
  254. QISRSessionEnd(sr->session_id, "write err");
  255. return ret;
  256. }
  257. sr->rec_stat = 2;
  258. while (sr->rec_stat != MSP_REC_STATUS_COMPLETE) {
  259. rslt = QISRGetResult(sr->session_id, &sr->rec_stat, 0, &ret);
  260. if (MSP_SUCCESS != ret) {
  261. sr_dbg("\nQISRGetResult failed! error code: %d\n", ret);
  262. end_sr_on_error(sr, ret);
  263. return ret;
  264. }
  265. if (NULL != rslt && sr->notif.on_result)
  266. sr->notif.on_result(rslt, sr->rec_stat == MSP_REC_STATUS_COMPLETE ? 1 : 0);
  267. Sleep(100);
  268. }
  269. QISRSessionEnd(sr->session_id, "normal");
  270. sr->session_id = NULL;
  271. return 0;
  272. }
  273. int sr_write_audio_data(struct speech_rec *sr, char *data, unsigned int len)
  274. {
  275. const char *rslt = NULL;
  276. int ret = 0;
  277. if (!sr )
  278. return -E_SR_INVAL;
  279. if (!data || !len)
  280. return 0;
  281. ret = QISRAudioWrite(sr->session_id, data, len, sr->audio_status, &sr->ep_stat, &sr->rec_stat);
  282. if (ret) {
  283. end_sr_on_error(sr, ret);
  284. return ret;
  285. }
  286. sr->audio_status = MSP_AUDIO_SAMPLE_CONTINUE;
  287. if (MSP_REC_STATUS_SUCCESS == sr->rec_stat) { //已经有部分听写结果
  288. rslt = QISRGetResult(sr->session_id, &sr->rec_stat, 0, &ret);
  289. if (MSP_SUCCESS != ret) {
  290. sr_dbg("\nQISRGetResult failed! error code: %d\n", ret);
  291. end_sr_on_error(sr, ret);
  292. return ret;
  293. }
  294. if (NULL != rslt && sr->notif.on_result)
  295. sr->notif.on_result(rslt, sr->rec_stat == MSP_REC_STATUS_COMPLETE ? 1 : 0);
  296. }
  297. if (MSP_EP_AFTER_SPEECH == sr->ep_stat)
  298. end_sr_on_vad(sr);
  299. return 0;
  300. }
  301. void sr_uninit(struct speech_rec * sr)
  302. {
  303. if (sr->recorder) {
  304. if(!is_record_stopped(sr->recorder))
  305. stop_record(sr->recorder);
  306. close_recorder(sr->recorder);
  307. destroy_recorder(sr->recorder);
  308. sr->recorder = NULL;
  309. }
  310. if (sr->session_begin_params) {
  311. SR_MFREE(sr->session_begin_params);
  312. sr->session_begin_params = NULL;
  313. }
  314. }