Dear Austin, I say thank your for the YouTube video tutorial.
Nevertheless, I am struggling with gpt4all_voice running offline on my PC (Win10) for two days now with no success.
The python script runs without error message, records the wake_word into the wake_detect.wav, and jarvis tells me 'listening' but then, nothing happens.
My voice question is not recorded into prompt.wav and the program hangs. It would be much appreciated if you could take a look at the code. I followed the steps in your video. I have even solved the issue of the missing vocab.bpe. Still nothing. :( Here is my main.py
from os import system
import speech_recognition as sr
from playsound import playsound
from gpt4all import GPT4All
import sys
import whisper
import warnings
import time
import os
wake_word = 'jarvis'
model = GPT4All("/users/apache33/appdata/local/nomic.ai/GPT4All/gpt4all-falcon-newbpe-q4_0.gguf", allow_download=False)
r = sr.Recognizer()
tiny_model_path = os.path.expanduser('/users/apache33/.cache/whisper/tiny.pt')
base_model_path = os.path.expanduser('/users/apache33/.cache/whisper/base.pt')
tiny_model = whisper.load_model(tiny_model_path)
base_model = whisper.load_model(base_model_path)
listening_for_wake_word = True
source = sr.Microphone()
warnings.filterwarnings("ignore", category=UserWarning, module='whisper.transcribe')
if sys.platform != 'darwin':
import pyttsx3
engine = pyttsx3.init()
def speak(text):
if sys.platform == 'darwin':
ALLOWED_CHARS = set("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789.,?!-_$:+-/ ")
clean_text = ''.join(c for c in text if c in ALLOWED_CHARS)
system(f"say '{clean_text}'")
else:
engine.say(text)
engine.runAndWait()
def listen_for_wake_word(audio):
global listening_for_wake_word
with open("wake_detect.wav", "wb") as f:
f.write(audio.get_wav_data())
result = tiny_model.transcribe('wake_detect.wav')
text_input = result['text']
if wake_word in text_input.lower().strip():
print("Wake word detected. Please speak your prompt to GPT4All.")
speak('Listening')
listening_for_wake_word = False
def prompt_gpt(audio):
global listening_for_wake_word
try:
with open("prompt.wav", "wb") as f:
f.write(audio.get_wav_data())
result = base_model.transcribe('prompt.wav')
prompt_text = result['text']
if len(prompt_text.strip()) == 0:
print("Empty prompt. Please speak again.")
speak("Empty prompt. Please speak again.")
listening_for_wake_word = True
else:
print('User: ' + prompt_text)
output = model.generate(prompt_text, max_tokens=200)
print('GPT4All: ', output)
speak(output)
print('\nSay', wake_word, 'to wake me up. \n')
listening_for_wake_word = True
except Exception as e:
print("Prompt error: ", e)
def callback(recognizer, audio):
global listening_for_wake_word
if listening_for_wake_word:
listen_for_wake_word(audio)
else:
prompt_gpt(audio)
def start_listening():
with source as s:
r.adjust_for_ambient_noise(s, duration=2)
print('\nSay', wake_word, 'to wake me up. \n')
r.listen_in_background(source, callback)
while True:
time.sleep(1)
if name == 'main':
start_listening()