[FEAT]: Add support for stream.read() to record silence from WASAPI speakers

**What problem are you facing?**

- [ ] audio isn`t recorded
- [ ] audio is recorded with artifacts
- [X] problem with "silence"
- [ ] other

**What is the cause of the error (in your opinion)?**

- [X] PyAudio\PortAudio bug
- [ ] I just need help(or answer)

I have created stream from speakers and trying to read() it. But when nothing plays on system sound then code stucks on stream.read(1024) until i turn on music or another sound source.

It would be great if you can add parameter fill_silense=True or something like that, so code won't be blocked on stream.read()


Here is code:
```Python
import numpy as np
import pyaudiowpatch as pyaudio
from faster_whisper import WhisperModel

def get_stream(p: pyaudio.PyAudio, device='micro'):
    if device == 'micro':
        stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, output=True, frames_per_buffer=1024)
        pyaudio.Stream()
    else:
        wasapi_info = p.get_host_api_info_by_type(pyaudio.paWASAPI)
        default_speakers = p.get_device_info_by_index(wasapi_info["defaultOutputDevice"])

        if not default_speakers["isLoopbackDevice"]:
            for loopback in p.get_loopback_device_info_generator():
                if default_speakers["name"] in loopback["name"]:
                    default_speakers = loopback
                    break
            else:
                print("Default loopback output device not found.\n\nRun `python -m pyaudiowpatch` to check available devices.\nExiting...\n")
                return

        print(f"Recording from: ({default_speakers['index']}){default_speakers['name']}")
        stream = p.open(
            format=pyaudio.paInt16,
            channels=default_speakers["maxInputChannels"],
            rate=int(default_speakers["defaultSampleRate"]),
            frames_per_buffer=1024,
            input=True,
            input_device_index=default_speakers["index"]
        )
    return stream

def transcribe_chuck(p, stream, model: WhisperModel, chunck_length=4):
    frames = []
    for _ in range(0, int(stream._rate / stream._frames_per_buffer * chunck_length)):
        data = stream.read(stream._frames_per_buffer)
        frames.append(data)

    audio_data = b''.join(frames)
    np_audio = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) / 32768.0

    segments, info = model.transcribe(np_audio, beam_size=7)
    transcription = ' '.join(segment.text for segment in segments)
    return transcription

def main():
    model_size = "large"
    model = WhisperModel(model_size, compute_type='float16')  # device="cuda",

    p = pyaudio.PyAudio()
    stream = get_stream(p, device='micro1')

    accumulated_transcription = ''

    try:
        while True:
            transcription = transcribe_chuck(p, stream, model)
            print(transcription)
            accumulated_transcription += transcription + ' '
    except KeyboardInterrupt:
        print('Stopping...')
        with open('log.txt', 'w') as log_file:
            log_file.write(accumulated_transcription)
    finally:
        stream.stop_stream()
        stream.close()
        p.terminate()

if __name__ == "__main__":
    main()

```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

[FEAT]: Add support for stream.read() to record silence from WASAPI speakers #28

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Uh oh!

[FEAT]: Add support for stream.read() to record silence from WASAPI speakers #28

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions