tts.c 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175
  1. /*
  2. * 语音合成(Text To Speech,TTS)技术能够自动将任意文字实时转换为连续的
  3. * 自然语音,是一种能够在任何时间、任何地点,向任何人提供语音信息服务的
  4. * 高效便捷手段,非常符合信息时代海量数据、动态更新和个性化查询的需求。
  5. */
  6. #include <stdlib.h>
  7. #include <stdio.h>
  8. #include <unistd.h>
  9. #include <errno.h>
  10. #include <string.h>
  11. #include "qtts.h"
  12. #include "msp_cmn.h"
  13. #include "msp_errors.h"
  14. typedef int SR_DWORD;
  15. typedef short int SR_WORD ;
  16. /* wav音频头部格式 */
  17. typedef struct _wave_pcm_hdr
  18. {
  19. char riff[4]; // = "RIFF"
  20. int size_8; // = FileSize - 8
  21. char wave[4]; // = "WAVE"
  22. char fmt[4]; // = "fmt "
  23. int fmt_size; // = 下一个结构体的大小 : 16
  24. short int format_tag; // = PCM : 1
  25. short int channels; // = 通道数 : 1
  26. int samples_per_sec; // = 采样率 : 8000 | 6000 | 11025 | 16000
  27. int avg_bytes_per_sec; // = 每秒字节数 : samples_per_sec * bits_per_sample / 8
  28. short int block_align; // = 每采样点字节数 : wBitsPerSample / 8
  29. short int bits_per_sample; // = 量化比特数: 8 | 16
  30. char data[4]; // = "data";
  31. int data_size; // = 纯数据长度 : FileSize - 44
  32. } wave_pcm_hdr;
  33. /* 默认wav音频头部数据 */
  34. wave_pcm_hdr default_wav_hdr =
  35. {
  36. { 'R', 'I', 'F', 'F' },
  37. 0,
  38. {'W', 'A', 'V', 'E'},
  39. {'f', 'm', 't', ' '},
  40. 16,
  41. 1,
  42. 1,
  43. 16000,
  44. 32000,
  45. 2,
  46. 16,
  47. {'d', 'a', 't', 'a'},
  48. 0
  49. };
  50. /* 文本合成 */
  51. int text_to_speech(const char* src_text, const char* des_path, const char* params)
  52. {
  53. int ret = -1;
  54. FILE* fp = NULL;
  55. const char* sessionID = NULL;
  56. unsigned int audio_len = 0;
  57. wave_pcm_hdr wav_hdr = default_wav_hdr;
  58. int synth_status = MSP_TTS_FLAG_STILL_HAVE_DATA;
  59. if (NULL == src_text || NULL == des_path)
  60. {
  61. printf("params is error!\n");
  62. return ret;
  63. }
  64. fp = fopen(des_path, "wb");
  65. if (NULL == fp)
  66. {
  67. printf("open %s error.\n", des_path);
  68. return ret;
  69. }
  70. /* 开始合成 */
  71. sessionID = QTTSSessionBegin(params, &ret);
  72. if (MSP_SUCCESS != ret)
  73. {
  74. printf("QTTSSessionBegin failed, error code: %d.\n", ret);
  75. fclose(fp);
  76. return ret;
  77. }
  78. ret = QTTSTextPut(sessionID, src_text, (unsigned int)strlen(src_text), NULL);
  79. if (MSP_SUCCESS != ret)
  80. {
  81. printf("QTTSTextPut failed, error code: %d.\n",ret);
  82. QTTSSessionEnd(sessionID, "TextPutError");
  83. fclose(fp);
  84. return ret;
  85. }
  86. printf("正在合成:%s\n", src_text);
  87. fwrite(&wav_hdr, sizeof(wav_hdr) ,1, fp); //添加wav音频头,使用采样率为16000
  88. while (1)
  89. {
  90. /* 获取合成音频 */
  91. const void* data = QTTSAudioGet(sessionID, &audio_len, &synth_status, &ret);
  92. if (MSP_SUCCESS != ret)
  93. break;
  94. if (NULL != data)
  95. {
  96. fwrite(data, audio_len, 1, fp);
  97. wav_hdr.data_size += audio_len; //计算data_size大小
  98. }
  99. if (MSP_TTS_FLAG_DATA_END == synth_status)
  100. break;
  101. }
  102. if (MSP_SUCCESS != ret)
  103. {
  104. printf("QTTSAudioGet failed, error code: %d.\n",ret);
  105. QTTSSessionEnd(sessionID, "AudioGetError");
  106. fclose(fp);
  107. return ret;
  108. }
  109. /* 修正wav文件头数据的大小 */
  110. wav_hdr.size_8 += wav_hdr.data_size + (sizeof(wav_hdr) - 8);
  111. /* 将修正过的数据写回文件头部,音频文件为wav格式 */
  112. fseek(fp, 4, 0);
  113. fwrite(&wav_hdr.size_8,sizeof(wav_hdr.size_8), 1, fp); //写入size_8的值
  114. fseek(fp, 40, 0); //将文件指针偏移到存储data_size值的位置
  115. fwrite(&wav_hdr.data_size,sizeof(wav_hdr.data_size), 1, fp); //写入data_size的值
  116. fclose(fp);
  117. fp = NULL;
  118. /* 合成完毕 */
  119. ret = QTTSSessionEnd(sessionID, "Normal");
  120. if (MSP_SUCCESS != ret)
  121. {
  122. printf("QTTSSessionEnd failed, error code: %d.\n",ret);
  123. }
  124. return ret;
  125. }
  126. int startTTS(char *text)
  127. {
  128. int ret = MSP_SUCCESS;
  129. const char* login_params = "appid = 5d5b9efd, work_dir = .";//登录参数,appid与msc库绑定,请勿随意改动
  130. /*
  131. * rdn: 合成音频数字发音方式
  132. * volume: 合成音频的音量
  133. * pitch: 合成音频的音调
  134. * speed: 合成音频对应的语速
  135. * voice_name: 合成发音人
  136. * sample_rate: 合成音频采样率
  137. * text_encoding: 合成文本编码格式
  138. */
  139. const char* session_begin_params = "engine_type = local,voice_name=xiaofeng, text_encoding = UTF8, tts_res_path = fo|res/tts/xiaofeng.jet;fo|res/tts/common.jet, sample_rate = 16000, speed = 50, volume = 50, pitch = 50, rdn = 0, effect= 3";
  140. const char* filename = "tts_sample.wav"; //合成的语音文件名称
  141. /* 用户登录 */
  142. ret = MSPLogin(NULL, NULL, login_params); //第一个参数是用户名,第二个参数是密码,第三个参数是登录参数,用户名和密码可在http://www.xfyun.cn注册获取
  143. if (MSP_SUCCESS != ret)
  144. {
  145. printf("TTS MSPLogin failed, error code: %d.\n", ret);
  146. goto exit; //登录失败,退出登录
  147. }
  148. /*语音合成*/
  149. ret = text_to_speech(text, filename, session_begin_params);
  150. if (MSP_SUCCESS != ret)
  151. {
  152. printf("text_to_speech failed, error code: %d.\n", ret);
  153. }
  154. printf("合成完毕,开始播放合成语音...\n");
  155. system("play -q --multi-threaded tts_sample.wav");
  156. exit:
  157. MSPLogout(); //退出登录
  158. return 0;
  159. }