import streamlit as st from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings import av import whisper import openai import tempfile import os from gtts import gTTS from pydub import AudioSegment # Load Whisper model model = whisper.load_model("base") # Set OpenAI (Groq-compatible) API Key openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key") st.title("🎙️ Voice-to-Voice Conversational App") # Record audio using streamlit-webrtc st.info("🎤 Please record your question below:") audio_placeholder = st.empty() webrtc_ctx = webrtc_streamer( key="speech", mode=WebRtcMode.SENDRECV, client_settings=ClientSettings( media_stream_constraints={"audio": True, "video": False}, rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]} ), audio_receiver_size=1024, ) if "audio_buffer" not in st.session_state: st.session_state.audio_buffer = b"" if webrtc_ctx.audio_receiver: audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1) for frame in audio_frames: st.session_state.audio_buffer += frame.to_ndarray().tobytes() if st.button("🛑 Process Voice"): with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f: f.write(st.session_state.audio_buffer) audio_path = f.name st.audio(audio_path) st.info("Transcribing with Whisper...") result = model.transcribe(audio_path) user_text = result["text"] st.success(f"You said: {user_text}") st.info("Generating AI response...") response = openai.ChatCompletion.create( model="mixtral-8x7b-32768", messages=[{"role": "user", "content": user_text}] ) reply = response['choices'][0]['message']['content'] st.success(f"AI says: {reply}") # Convert to speech tts = gTTS(reply) tts_path = "reply.mp3" tts.save(tts_path) st.audio(tts_path, format="audio/mp3")