import streamlit as st
from streamlit_webrtc import webrtc_streamer, WebRtcMode, ClientSettings
import av
import whisper
import openai
import tempfile
import os
from gtts import gTTS
from pydub import AudioSegment

# Load Whisper model
model = whisper.load_model("base")

# Set OpenAI (Groq-compatible) API Key
openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key")

st.title("🎙️ Voice-to-Voice Conversational App")

# Record audio using streamlit-webrtc
st.info("🎤 Please record your question below:")

audio_placeholder = st.empty()

webrtc_ctx = webrtc_streamer(
    key="speech",
    mode=WebRtcMode.SENDRECV,
    client_settings=ClientSettings(
        media_stream_constraints={"audio": True, "video": False},
        rtc_configuration={"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
    ),
    audio_receiver_size=1024,
)

if "audio_buffer" not in st.session_state:
    st.session_state.audio_buffer = b""

if webrtc_ctx.audio_receiver:
    audio_frames = webrtc_ctx.audio_receiver.get_frames(timeout=1)
    for frame in audio_frames:
        st.session_state.audio_buffer += frame.to_ndarray().tobytes()

if st.button("🛑 Process Voice"):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
        f.write(st.session_state.audio_buffer)
        audio_path = f.name

    st.audio(audio_path)

    st.info("Transcribing with Whisper...")
    result = model.transcribe(audio_path)
    user_text = result["text"]
    st.success(f"You said: {user_text}")

    st.info("Generating AI response...")
    response = openai.ChatCompletion.create(
        model="mixtral-8x7b-32768",
        messages=[{"role": "user", "content": user_text}]
    )
    reply = response['choices'][0]['message']['content']
    st.success(f"AI says: {reply}")

    # Convert to speech
    tts = gTTS(reply)
    tts_path = "reply.mp3"
    tts.save(tts_path)
    st.audio(tts_path, format="audio/mp3")