pyaudio - "Listen" until voice is detected and then record to a .wav file
Solution 1
Look here:
https://github.com/jeysonmc/python-google-speech-scripts/blob/master/stt_google.py
It even converts Wav to flac and sends it to the google Speech api , just delete the stt_google_wav function if you dont need it ;)
Solution 2
Having spent some time on it, I've come up with the following code that seems to be doing what you need, except writing to file:
import threading
from array import array
from Queue import Queue, Full
import pyaudio
CHUNK_SIZE = 1024
MIN_VOLUME = 500
# if the recording thread can't consume fast enough, the listener will start discarding
BUF_MAX_SIZE = CHUNK_SIZE * 10
def main():
stopped = threading.Event()
q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK_SIZE)))
listen_t = threading.Thread(target=listen, args=(stopped, q))
listen_t.start()
record_t = threading.Thread(target=record, args=(stopped, q))
record_t.start()
try:
while True:
listen_t.join(0.1)
record_t.join(0.1)
except KeyboardInterrupt:
stopped.set()
listen_t.join()
record_t.join()
def record(stopped, q):
while True:
if stopped.wait(timeout=0):
break
chunk = q.get()
vol = max(chunk)
if vol >= MIN_VOLUME:
# TODO: write to file
print "O",
else:
print "-",
def listen(stopped, q):
stream = pyaudio.PyAudio().open(
format=pyaudio.paInt16,
channels=2,
rate=44100,
input=True,
frames_per_buffer=1024,
)
while True:
if stopped.wait(timeout=0):
break
try:
q.put(array('h', stream.read(CHUNK_SIZE)))
except Full:
pass # discard
if __name__ == '__main__':
main()
Comments
-
Phorce almost 4 years
I'm having some problems and I cannot seem to get my head around the concept.
What I am trying to do is this:
Have the microphone "listen" for voiced (above a particular threshold) and then start recording to a .wav file until the person has stopped speaking / the signal is no longer there. For example:
begin: listen() -> nothing is being said listen() -> nothing is being said listen() -> VOICED - _BEGIN RECORDING_ listen() -> VOICED - _BEGIN RECORDING_ listen() -> UNVOICED - _END RECORDING_ end
I want to do this also using "threading" so a thread would be created that "listens" to the file constantly, and, another thread will begin when there is voiced data.. But, I cannot for the life of me figure out how I should go about it.. Here is my code so far:
import wave import sys import threading from array import array from sys import byteorder try: import pyaudio CHECK_PYLIB = True except ImportError: CHECK_PYLIB = False class Audio: _chunk = 0.0 _format = 0.0 _channels = 0.0 _rate = 0.0 record_for = 0.0 stream = None p = None sample_width = None THRESHOLD = 500 # initial constructor to accept params def __init__(self, chunk, format, channels, rate): #### set data-types self._chunk = chunk self.format = pyaudio.paInt16, self.channels = channels self.rate = rate self.p = pyaudio.PyAudio(); def open(self): # print "opened" self.stream = self.p.open(format=pyaudio.paInt16, channels=2, rate=44100, input=True, frames_per_buffer=1024); return True def record(self): # create a new instance/thread to record the sound threading.Thread(target=self.listen).start(); def is_silence(snd_data): return max(snd_data) < THRESHOLD def listen(self): r = array('h') while True: snd_data = array('h', self.stream.read(self._chunk)) if byteorder == 'big': snd_data.byteswap() r.extend(snd_data) return sample_width, r
I'm guessing that I could record "5" second blocks, and, then if the block is deemed as "voiced" then it the thread should be started until all the voice data has been captured. However, because at current it's at
while True:
i don't want to capture all of the audio up until there are voiced commands, so e.g. "no voice", "no voice", "voice", "voice", "no voice", "no voice" i just want the "voice" inside the wav file.. Anyone have any suggestions?Thank you
EDIT:
import wave import sys import time import threading from array import array from sys import byteorder from Queue import Queue, Full import pyaudio CHUNK_SIZE = 1024 MIN_VOLUME = 500 BUF_MAX_SIZE = 1024 * 10 process_g = 0 def main(): stopped = threading.Event() q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK_SIZE))) listen_t = threading.Thread(target=listen, args=(stopped, q)) listen_t.start() process_g = threading.Thread(target=process, args=(stopped, q)) process_g.start() try: while True: listen_t.join(0.1) process_g.join(0.1) except KeyboardInterrupt: stopped.set() listen_t.join() process_g.join() def process(stopped, q): while True: if stopped.wait(timeout = 0): break print "I'm processing.." time.sleep(300) def listen(stopped, q): stream = pyaudio.PyAudio().open( format = pyaudio.paInt16, channels = 2, rate = 44100, input = True, frames_per_buffer = 1024 ) while True: if stopped and stopped.wait(timeout=0): break try: print process_g for i in range(0, int(44100 / 1024 * 5)): data_chunk = array('h', stream.read(CHUNK_SIZE)) vol = max(data_chunk) if(vol >= MIN_VOLUME): print "WORDS.." else: print "Nothing.." except Full: pass if __name__ == '__main__': main()
Now, after every 5 seconds, I need the "process" function to execute, and then process the data (time.delay(10) whilst it does this and then start the recording back up..