|
import streamlit as st |
|
import whisper |
|
import openai |
|
import tempfile |
|
import os |
|
import requests |
|
from gtts import gTTS |
|
from pydub import AudioSegment |
|
from pydub.playback import play |
|
|
|
|
|
openai.api_key = os.getenv("GROQ_API_KEY", "your-groq-api-key") |
|
|
|
|
|
model = whisper.load_model("base") |
|
|
|
|
|
st.title("ποΈ Voice-to-Voice Conversational App") |
|
|
|
|
|
uploaded_file = st.file_uploader("Upload your voice message (MP3/WAV)", type=["mp3", "wav"]) |
|
|
|
if uploaded_file: |
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: |
|
tmp.write(uploaded_file.read()) |
|
tmp_path = tmp.name |
|
|
|
|
|
st.info("Transcribing...") |
|
result = model.transcribe(tmp_path) |
|
user_text = result["text"] |
|
st.success(f"You said: {user_text}") |
|
|
|
|
|
st.info("Thinking...") |
|
response = openai.ChatCompletion.create( |
|
model="mixtral-8x7b-32768", |
|
messages=[{"role": "user", "content": user_text}] |
|
) |
|
reply_text = response["choices"][0]["message"]["content"] |
|
st.success(f"AI says: {reply_text}") |
|
|
|
|
|
tts = gTTS(reply_text) |
|
tts_path = "response.mp3" |
|
tts.save(tts_path) |
|
|
|
|
|
audio = AudioSegment.from_file(tts_path) |
|
st.audio(tts_path, format="audio/mp3") |
|
|
|
|
|
os.remove(tmp_path) |
|
|