|
import streamlit as st |
|
import pyaudio |
|
import wave |
|
import whisper |
|
import openai |
|
import tempfile |
|
import os |
|
from gtts import gTTS |
|
|
|
|
|
openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key") |
|
|
|
|
|
model = whisper.load_model("base") |
|
|
|
|
|
def record_audio(filename="recorded.wav", duration=5): |
|
p = pyaudio.PyAudio() |
|
stream = p.open(format=pyaudio.paInt16, |
|
channels=1, |
|
rate=44100, |
|
input=True, |
|
frames_per_buffer=1024) |
|
|
|
frames = [] |
|
for _ in range(0, int(44100 / 1024 * duration)): |
|
data = stream.read(1024) |
|
frames.append(data) |
|
|
|
stream.stop_stream() |
|
stream.close() |
|
p.terminate() |
|
|
|
with wave.open(filename, 'wb') as wf: |
|
wf.setnchannels(1) |
|
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16)) |
|
wf.setframerate(44100) |
|
wf.writeframes(b''.join(frames)) |
|
|
|
|
|
st.title("ποΈ Voice-to-Voice Conversational App") |
|
st.info("π€ Click the button to record your voice!") |
|
|
|
if st.button("Record"): |
|
with st.spinner("Recording..."): |
|
record_audio("user_input.wav") |
|
st.success("Recording finished!") |
|
|
|
|
|
st.info("Transcribing...") |
|
result = model.transcribe("user_input.wav") |
|
user_input = result["text"] |
|
st.success(f"You said: {user_input}") |
|
|
|
|
|
st.info("Thinking...") |
|
response = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=[{"role": "user", "content": user_input}] |
|
) |
|
answer = response['choices'][0]['message']['content'] |
|
st.success(f"AI says: {answer}") |
|
|
|
|
|
tts = gTTS(answer) |
|
tts.save("response.mp3") |
|
st.audio("response.mp3", format="audio/mp3") |
|
|