art / app.py
meraj12's picture
Update app.py
7f4232c verified
import streamlit as st
import pyaudio
import wave
import whisper
import openai
import tempfile
import os
from gtts import gTTS
# Set OpenAI API Key
openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")
# Load Whisper model
model = whisper.load_model("base")
# Function to record audio
def record_audio(filename="recorded.wav", duration=5):
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16,
channels=1,
rate=44100,
input=True,
frames_per_buffer=1024)
frames = []
for _ in range(0, int(44100 / 1024 * duration)):
data = stream.read(1024)
frames.append(data)
stream.stop_stream()
stream.close()
p.terminate()
with wave.open(filename, 'wb') as wf:
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
wf.setframerate(44100)
wf.writeframes(b''.join(frames))
# Streamlit app UI
st.title("πŸŽ™οΈ Voice-to-Voice Conversational App")
st.info("🎀 Click the button to record your voice!")
if st.button("Record"):
with st.spinner("Recording..."):
record_audio("user_input.wav")
st.success("Recording finished!")
# Transcribing with Whisper
st.info("Transcribing...")
result = model.transcribe("user_input.wav")
user_input = result["text"]
st.success(f"You said: {user_input}")
# AI response with OpenAI
st.info("Thinking...")
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[{"role": "user", "content": user_input}]
)
answer = response['choices'][0]['message']['content']
st.success(f"AI says: {answer}")
# Convert AI response to speech
tts = gTTS(answer)
tts.save("response.mp3")
st.audio("response.mp3", format="audio/mp3")