snowboydecoder.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253
  1. #!/usr/bin/env python
  2. import collections
  3. import pyaudio
  4. from . import snowboydetect
  5. import time
  6. import wave
  7. import os
  8. import logging
  9. logging.basicConfig()
  10. logger = logging.getLogger("snowboy")
  11. logger.setLevel(logging.INFO)
  12. TOP_DIR = os.path.dirname(os.path.abspath(__file__))
  13. RESOURCE_FILE = os.path.join(TOP_DIR, "resources/common.res")
  14. DETECT_DING = os.path.join(TOP_DIR, "resources/ding.wav")
  15. DETECT_DONG = os.path.join(TOP_DIR, "resources/dong.wav")
  16. class RingBuffer(object):
  17. """Ring buffer to hold audio from PortAudio"""
  18. def __init__(self, size=4096):
  19. self._buf = collections.deque(maxlen=size)
  20. def extend(self, data):
  21. """Adds data to the end of buffer"""
  22. self._buf.extend(data)
  23. def get(self):
  24. """Retrieves data from the beginning of buffer and clears it"""
  25. tmp = bytes(bytearray(self._buf))
  26. self._buf.clear()
  27. return tmp
  28. def play_audio_file(fname=DETECT_DING):
  29. """Simple callback function to play a wave file. By default it plays
  30. a Ding sound.
  31. :param str fname: wave file name
  32. :return: None
  33. """
  34. ding_wav = wave.open(fname, 'rb')
  35. ding_data = ding_wav.readframes(ding_wav.getnframes())
  36. audio = pyaudio.PyAudio()
  37. stream_out = audio.open(
  38. format=audio.get_format_from_width(ding_wav.getsampwidth()),
  39. channels=ding_wav.getnchannels(),
  40. rate=ding_wav.getframerate(), input=False, output=True)
  41. stream_out.start_stream()
  42. stream_out.write(ding_data)
  43. time.sleep(0.2)
  44. stream_out.stop_stream()
  45. stream_out.close()
  46. audio.terminate()
  47. class HotwordDetector(object):
  48. """
  49. Snowboy decoder to detect whether a keyword specified by `decoder_model`
  50. exists in a microphone input stream.
  51. :param decoder_model: decoder model file path, a string or a list of strings
  52. :param resource: resource file path.
  53. :param sensitivity: decoder sensitivity, a float of a list of floats.
  54. The bigger the value, the more senstive the
  55. decoder. If an empty list is provided, then the
  56. default sensitivity in the model will be used.
  57. :param audio_gain: multiply input volume by this factor.
  58. """
  59. def __init__(self, decoder_model,
  60. resource=RESOURCE_FILE,
  61. sensitivity=[],
  62. audio_gain=1):
  63. tm = type(decoder_model)
  64. ts = type(sensitivity)
  65. if tm is not list:
  66. decoder_model = [decoder_model]
  67. if ts is not list:
  68. sensitivity = [sensitivity]
  69. model_str = ",".join(decoder_model)
  70. self.detector = snowboydetect.SnowboyDetect(
  71. resource_filename=resource.encode(), model_str=model_str.encode())
  72. self.detector.SetAudioGain(audio_gain)
  73. self.num_hotwords = self.detector.NumHotwords()
  74. if len(decoder_model) > 1 and len(sensitivity) == 1:
  75. sensitivity = sensitivity * self.num_hotwords
  76. if len(sensitivity) != 0:
  77. assert self.num_hotwords == len(sensitivity), \
  78. "number of hotwords in decoder_model (%d) and sensitivity " \
  79. "(%d) does not match" % (self.num_hotwords, len(sensitivity))
  80. sensitivity_str = ",".join([str(t) for t in sensitivity])
  81. if len(sensitivity) != 0:
  82. self.detector.SetSensitivity(sensitivity_str.encode())
  83. self.ring_buffer = RingBuffer(
  84. self.detector.NumChannels() * self.detector.SampleRate() * 5)
  85. def start(self, detected_callback=play_audio_file,
  86. interrupt_check=lambda: False,
  87. sleep_time=0.03,
  88. audio_recorder_callback=None,
  89. silent_count_threshold=15,
  90. recording_timeout=100):
  91. """
  92. Start the voice detector. For every `sleep_time` second it checks the
  93. audio buffer for triggering keywords. If detected, then call
  94. corresponding function in `detected_callback`, which can be a single
  95. function (single model) or a list of callback functions (multiple
  96. models). Every loop it also calls `interrupt_check` -- if it returns
  97. True, then breaks from the loop and return.
  98. :param detected_callback: a function or list of functions. The number of
  99. items must match the number of models in
  100. `decoder_model`.
  101. :param interrupt_check: a function that returns True if the main loop
  102. needs to stop.
  103. :param float sleep_time: how much time in second every loop waits.
  104. :param audio_recorder_callback: if specified, this will be called after
  105. a keyword has been spoken and after the
  106. phrase immediately after the keyword has
  107. been recorded. The function will be
  108. passed the name of the file where the
  109. phrase was recorded.
  110. :param silent_count_threshold: indicates how long silence must be heard
  111. to mark the end of a phrase that is
  112. being recorded.
  113. :param recording_timeout: limits the maximum length of a recording.
  114. :return: None
  115. """
  116. self._running = True
  117. def audio_callback(in_data, frame_count, time_info, status):
  118. self.ring_buffer.extend(in_data)
  119. play_data = chr(0) * len(in_data)
  120. return play_data, pyaudio.paContinue
  121. self.audio = pyaudio.PyAudio()
  122. self.stream_in = self.audio.open(
  123. input=True, output=False,
  124. format=self.audio.get_format_from_width(
  125. self.detector.BitsPerSample() / 8),
  126. channels=self.detector.NumChannels(),
  127. rate=self.detector.SampleRate(),
  128. frames_per_buffer=2048,
  129. stream_callback=audio_callback)
  130. if interrupt_check():
  131. logger.debug("detect voice return")
  132. return
  133. tc = type(detected_callback)
  134. if tc is not list:
  135. detected_callback = [detected_callback]
  136. if len(detected_callback) == 1 and self.num_hotwords > 1:
  137. detected_callback *= self.num_hotwords
  138. assert self.num_hotwords == len(detected_callback), \
  139. "Error: hotwords in your models (%d) do not match the number of " \
  140. "callbacks (%d)" % (self.num_hotwords, len(detected_callback))
  141. logger.debug("detecting...")
  142. state = "PASSIVE"
  143. while self._running is True:
  144. if interrupt_check():
  145. logger.debug("detect voice break")
  146. break
  147. data = self.ring_buffer.get()
  148. if len(data) == 0:
  149. time.sleep(sleep_time)
  150. continue
  151. status = self.detector.RunDetection(data)
  152. if status == -1:
  153. logger.warning("Error initializing streams or reading audio data")
  154. #small state machine to handle recording of phrase after keyword
  155. if state == "PASSIVE":
  156. if status > 0: #key word found
  157. self.recordedData = []
  158. self.recordedData.append(data)
  159. silentCount = 0
  160. recordingCount = 0
  161. message = "Keyword " + str(status) + " detected at time: "
  162. message += time.strftime("%Y-%m-%d %H:%M:%S",
  163. time.localtime(time.time()))
  164. logger.info(message)
  165. callback = detected_callback[status-1]
  166. if callback is not None:
  167. callback()
  168. if audio_recorder_callback is not None:
  169. state = "ACTIVE"
  170. continue
  171. elif state == "ACTIVE":
  172. stopRecording = False
  173. if recordingCount > recording_timeout:
  174. stopRecording = True
  175. elif status == -2: #silence found
  176. if silentCount > silent_count_threshold:
  177. stopRecording = True
  178. else:
  179. silentCount = silentCount + 1
  180. elif status == 0: #voice found
  181. silentCount = 0
  182. if stopRecording == True:
  183. fname = self.saveMessage()
  184. audio_recorder_callback(fname)
  185. state = "PASSIVE"
  186. continue
  187. recordingCount = recordingCount + 1
  188. self.recordedData.append(data)
  189. logger.debug("finished.")
  190. def saveMessage(self):
  191. """
  192. Save the message stored in self.recordedData to a timestamped file.
  193. """
  194. filename = 'output' + str(int(time.time())) + '.wav'
  195. data = b''.join(self.recordedData)
  196. #use wave to save data
  197. wf = wave.open(filename, 'wb')
  198. wf.setnchannels(1)
  199. wf.setsampwidth(self.audio.get_sample_size(
  200. self.audio.get_format_from_width(
  201. self.detector.BitsPerSample() / 8)))
  202. wf.setframerate(self.detector.SampleRate())
  203. wf.writeframes(data)
  204. wf.close()
  205. logger.debug("finished saving: " + filename)
  206. return filename
  207. def terminate(self):
  208. """
  209. Terminate audio stream. Users can call start() again to detect.
  210. :return: None
  211. """
  212. self.stream_in.stop_stream()
  213. self.stream_in.close()
  214. self.audio.terminate()
  215. self._running = False