pyaudio - "Listen" until voice is detected and then record to a .wav file

24,591

Solution 1

Look here:

https://github.com/jeysonmc/python-google-speech-scripts/blob/master/stt_google.py

It even converts Wav to flac and sends it to the google Speech api , just delete the stt_google_wav function if you dont need it ;)

Solution 2

Having spent some time on it, I've come up with the following code that seems to be doing what you need, except writing to file:

import threading
from array import array
from Queue import Queue, Full

import pyaudio


CHUNK_SIZE = 1024
MIN_VOLUME = 500
# if the recording thread can't consume fast enough, the listener will start discarding
BUF_MAX_SIZE = CHUNK_SIZE * 10


def main():
    stopped = threading.Event()
    q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK_SIZE)))

    listen_t = threading.Thread(target=listen, args=(stopped, q))
    listen_t.start()
    record_t = threading.Thread(target=record, args=(stopped, q))
    record_t.start()

    try:
        while True:
            listen_t.join(0.1)
            record_t.join(0.1)
    except KeyboardInterrupt:
        stopped.set()

    listen_t.join()
    record_t.join()


def record(stopped, q):
    while True:
        if stopped.wait(timeout=0):
            break
        chunk = q.get()
        vol = max(chunk)
        if vol >= MIN_VOLUME:
            # TODO: write to file
            print "O",
        else:
            print "-",


def listen(stopped, q):
    stream = pyaudio.PyAudio().open(
        format=pyaudio.paInt16,
        channels=2,
        rate=44100,
        input=True,
        frames_per_buffer=1024,
    )

    while True:
        if stopped.wait(timeout=0):
            break
        try:
            q.put(array('h', stream.read(CHUNK_SIZE)))
        except Full:
            pass  # discard


if __name__ == '__main__':
    main()
Share:
24,591
Phorce
Author by

Phorce

C++ addict in my spare time, which is hardly ever.

Updated on October 05, 2020

Comments

  • Phorce
    Phorce almost 4 years

    I'm having some problems and I cannot seem to get my head around the concept.

    What I am trying to do is this:

    Have the microphone "listen" for voiced (above a particular threshold) and then start recording to a .wav file until the person has stopped speaking / the signal is no longer there. For example:

    begin:
       listen() -> nothing is being said
       listen() -> nothing is being said
       listen() -> VOICED - _BEGIN RECORDING_
       listen() -> VOICED - _BEGIN RECORDING_
       listen() -> UNVOICED - _END RECORDING_
    end
    

    I want to do this also using "threading" so a thread would be created that "listens" to the file constantly, and, another thread will begin when there is voiced data.. But, I cannot for the life of me figure out how I should go about it.. Here is my code so far:

    import wave
    import sys
    import threading
    from array import array
    from sys import byteorder
    
    try:
        import pyaudio
        CHECK_PYLIB = True
    except ImportError:
        CHECK_PYLIB = False
    
    class Audio:
        _chunk = 0.0
        _format = 0.0
        _channels = 0.0
        _rate = 0.0
        record_for = 0.0
        stream = None
    
        p = None
    
        sample_width = None
        THRESHOLD = 500
    
        # initial constructor to accept params
        def __init__(self, chunk, format, channels, rate):
            #### set data-types
    
            self._chunk = chunk
            self.format = pyaudio.paInt16,
            self.channels = channels
            self.rate = rate
    
            self.p = pyaudio.PyAudio();
    
       def open(self):
           # print "opened"
           self.stream = self.p.open(format=pyaudio.paInt16,
                                     channels=2,
                                     rate=44100,
                                     input=True,
                                     frames_per_buffer=1024);
           return True
    
       def record(self):
           # create a new instance/thread to record the sound
           threading.Thread(target=self.listen).start();
    
       def is_silence(snd_data):
           return max(snd_data) < THRESHOLD
    
       def listen(self):
           r = array('h')
    
           while True:
               snd_data = array('h', self.stream.read(self._chunk))
    
               if byteorder == 'big':
                   snd_data.byteswap()
               r.extend(snd_data)
    
           return sample_width, r
    

    I'm guessing that I could record "5" second blocks, and, then if the block is deemed as "voiced" then it the thread should be started until all the voice data has been captured. However, because at current it's at while True: i don't want to capture all of the audio up until there are voiced commands, so e.g. "no voice", "no voice", "voice", "voice", "no voice", "no voice" i just want the "voice" inside the wav file.. Anyone have any suggestions?

    Thank you

    EDIT:

    import wave
    import sys
    import time 
    import threading 
    from array import array
    from sys import byteorder
    from Queue import Queue, Full
    
    import pyaudio 
    
    CHUNK_SIZE = 1024
    MIN_VOLUME = 500
    
    BUF_MAX_SIZE = 1024 * 10 
    
    process_g = 0 
    
    def main():
    
    stopped = threading.Event()
    
    q = Queue(maxsize=int(round(BUF_MAX_SIZE / CHUNK_SIZE)))
    
    listen_t = threading.Thread(target=listen, args=(stopped, q))
    
    listen_t.start()
    
    process_g = threading.Thread(target=process, args=(stopped, q))
    
    process_g.start()
    
    try:
    
        while True:
            listen_t.join(0.1)
            process_g.join(0.1)
    except KeyboardInterrupt:
            stopped.set()
    
    listen_t.join()
    process_g.join()
    
      def process(stopped, q):
    
      while True:
        if stopped.wait(timeout = 0):
            break
        print "I'm processing.."
        time.sleep(300)
    
       def listen(stopped, q):
    
       stream = pyaudio.PyAudio().open(
            format = pyaudio.paInt16,
            channels = 2,
            rate = 44100,
            input = True,
            frames_per_buffer = 1024    
                )
    
         while True:
    
          if stopped and stopped.wait(timeout=0):
              break
          try:
            print process_g
            for i in range(0, int(44100 / 1024 * 5)):
                data_chunk = array('h', stream.read(CHUNK_SIZE))
                vol = max(data_chunk)
                if(vol >= MIN_VOLUME):
                    print "WORDS.."
                else:
                    print "Nothing.."
    
            except Full:
                    pass 
    
        if __name__ == '__main__':
        main()
    

    Now, after every 5 seconds, I need the "process" function to execute, and then process the data (time.delay(10) whilst it does this and then start the recording back up..