|
import streamlit as st |
|
from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings |
|
import av |
|
import whisper |
|
import openai |
|
import tempfile |
|
import os |
|
from gtts import gTTS |
|
from pydub import AudioSegment |
|
|
|
|
|
model = whisper.load_model("base") |
|
|
|
|
|
openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key") |
|
|
|
st.title("ποΈ Voice-to-Voice Conversational App") |
|
|
|
|
|
st.info("π€ Please record your question below:") |
|
|
|
audio_placeholder = st.empty() |
|
|
|
webrtc_ctx = webrtc_streamer( |
|
key="speech", |
|
mode=WebRtcMode.SENDRECV, |
|
client_settings=ClientSettings( |
|
media_stream_constraints={"audio": True, "video": False}, |
|
rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]} |
|
), |
|
audio_receiver_size=1024, |
|
) |
|
|
|
if "audio_buffer" not in st.session_state: |
|
st.session_state.audio_buffer = b"" |
|
|
|
if webrtc_ctx.audio_receiver: |
|
audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1) |
|
for frame in audio_frames: |
|
st.session_state.audio_buffer += frame.to_ndarray().tobytes() |
|
|
|
if st.button("π Process Voice"): |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: |
|
f.write(st.session_state.audio_buffer) |
|
audio_path = f.name |
|
|
|
st.audio(audio_path) |
|
|
|
st.info("Transcribing with Whisper...") |
|
result = model.transcribe(audio_path) |
|
user_text = result["text"] |
|
st.success(f"You said: {user_text}") |
|
|
|
st.info("Generating AI response...") |
|
response = openai.ChatCompletion.create( |
|
model="mixtral-8x7b-32768", |
|
messages=[{"role": "user", "content": user_text}] |
|
) |
|
reply = response['choices'][0]['message']['content'] |
|
st.success(f"AI says: {reply}") |
|
|
|
|
|
tts = gTTS(reply) |
|
tts_path = "reply.mp3" |
|
tts.save(tts_path) |
|
st.audio(tts_path, format="audio/mp3") |
|
|