art / app.py
meraj12's picture
Update app.py
ab48168 verified
raw
history blame
1.93 kB
import streamlit as st
from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
import av
import whisper
import openai
import tempfile
import os
from gtts import gTTS
from pydub import AudioSegment
# Load Whisper model
model = whisper.load_model("base")
# Set OpenAI (Groq-compatible) API Key
openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")
st.title("πŸŽ™οΈ Voice-to-Voice Conversational App")
# Record audio using streamlit-webrtc
st.info("🎀 Please record your question below:")
audio_placeholder = st.empty()
webrtc_ctx = webrtc_streamer(
key="speech",
mode=WebRtcMode.SENDRECV,
client_settings=ClientSettings(
media_stream_constraints={"audio": True, "video": False},
rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
),
audio_receiver_size=1024,
)
if "audio_buffer" not in st.session_state:
st.session_state.audio_buffer = b""
if webrtc_ctx.audio_receiver:
audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1)
for frame in audio_frames:
st.session_state.audio_buffer += frame.to_ndarray().tobytes()
if st.button("πŸ›‘ Process Voice"):
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
f.write(st.session_state.audio_buffer)
audio_path = f.name
st.audio(audio_path)
st.info("Transcribing with Whisper...")
result = model.transcribe(audio_path)
user_text = result["text"]
st.success(f"You said: {user_text}")
st.info("Generating AI response...")
response = openai.ChatCompletion.create(
model="mixtral-8x7b-32768",
messages=[{"role": "user", "content": user_text}]
)
reply = response['choices'][0]['message']['content']
st.success(f"AI says: {reply}")
# Convert to speech
tts = gTTS(reply)
tts_path = "reply.mp3"
tts.save(tts_path)
st.audio(tts_path, format="audio/mp3")