File size: 1,931 Bytes
9b07f04
ab48168
 
0a3696b
b46560b
b67ca14
9b07f04
b46560b
 
5bb85a8
b46560b
 
b67ca14
ab48168
 
 
 
 
 
 
b20e1c3
ab48168
b20e1c3
ab48168
 
 
 
 
 
 
 
 
0a3696b
ab48168
 
0a3696b
ab48168
 
 
 
b20e1c3
ab48168
 
 
 
 
 
 
 
 
b46560b
b67ca14
 
ab48168
b46560b
0a3696b
b46560b
 
ab48168
 
b46560b
0a3696b
ab48168
 
b46560b
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import streamlit as st
from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
import av
import whisper
import openai
import tempfile
import os
from gtts import gTTS
from pydub import AudioSegment

# Load Whisper model
model = whisper.load_model("base")

# Set OpenAI (Groq-compatible) API Key
openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")

st.title("πŸŽ™οΈ Voice-to-Voice Conversational App")

# Record audio using streamlit-webrtc
st.info("🎀 Please record your question below:")

audio_placeholder = st.empty()

webrtc_ctx = webrtc_streamer(
    key="speech",
    mode=WebRtcMode.SENDRECV,
    client_settings=ClientSettings(
        media_stream_constraints={"audio": True, "video": False},
        rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
    ),
    audio_receiver_size=1024,
)

if "audio_buffer" not in st.session_state:
    st.session_state.audio_buffer = b""

if webrtc_ctx.audio_receiver:
    audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1)
    for frame in audio_frames:
        st.session_state.audio_buffer += frame.to_ndarray().tobytes()

if st.button("πŸ›‘ Process Voice"):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
        f.write(st.session_state.audio_buffer)
        audio_path = f.name

    st.audio(audio_path)

    st.info("Transcribing with Whisper...")
    result = model.transcribe(audio_path)
    user_text = result["text"]
    st.success(f"You said: {user_text}")

    st.info("Generating AI response...")
    response = openai.ChatCompletion.create(
        model="mixtral-8x7b-32768",
        messages=[{"role": "user", "content": user_text}]
    )
    reply = response['choices'][0]['message']['content']
    st.success(f"AI says: {reply}")

    # Convert to speech
    tts = gTTS(reply)
    tts_path = "reply.mp3"
    tts.save(tts_path)
    st.audio(tts_path, format="audio/mp3")