File size: 1,931 Bytes
9b07f04 ab48168 0a3696b b46560b b67ca14 9b07f04 b46560b 5bb85a8 b46560b b67ca14 ab48168 b20e1c3 ab48168 b20e1c3 ab48168 0a3696b ab48168 0a3696b ab48168 b20e1c3 ab48168 b46560b b67ca14 ab48168 b46560b 0a3696b b46560b ab48168 b46560b 0a3696b ab48168 b46560b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import streamlit as st
from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
import av
import whisper
import openai
import tempfile
import os
from gtts import gTTS
from pydub import AudioSegment
# Load Whisper model
model = whisper.load_model("base")
# Set OpenAI (Groq-compatible) API Key
openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")
st.title("ποΈ Voice-to-Voice Conversational App")
# Record audio using streamlit-webrtc
st.info("π€ Please record your question below:")
audio_placeholder = st.empty()
webrtc_ctx = webrtc_streamer(
key="speech",
mode=WebRtcMode.SENDRECV,
client_settings=ClientSettings(
media_stream_constraints={"audio": True, "video": False},
rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
),
audio_receiver_size=1024,
)
if "audio_buffer" not in st.session_state:
st.session_state.audio_buffer = b""
if webrtc_ctx.audio_receiver:
audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1)
for frame in audio_frames:
st.session_state.audio_buffer += frame.to_ndarray().tobytes()
if st.button("π Process Voice"):
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
f.write(st.session_state.audio_buffer)
audio_path = f.name
st.audio(audio_path)
st.info("Transcribing with Whisper...")
result = model.transcribe(audio_path)
user_text = result["text"]
st.success(f"You said: {user_text}")
st.info("Generating AI response...")
response = openai.ChatCompletion.create(
model="mixtral-8x7b-32768",
messages=[{"role": "user", "content": user_text}]
)
reply = response['choices'][0]['message']['content']
st.success(f"AI says: {reply}")
# Convert to speech
tts = gTTS(reply)
tts_path = "reply.mp3"
tts.save(tts_path)
st.audio(tts_path, format="audio/mp3")
|