Spaces:
Sleeping
Sleeping
import streamlit as st | |
from utils import save_uploaded_audio | |
from voice_cloner import clone_and_generate_text | |
import os | |
import whisper | |
import torchaudio | |
from groq import Groq | |
# Load Whisper model for transcription | |
whisper_model = whisper.load_model("tiny") | |
# Initialize Groq LLM client | |
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
st.set_page_config(page_title="Voice Chat", layout="centered") | |
st.title("π€π¬ Voice & Text Chat using Your Cloned Voice") | |
# Store cloned voice path | |
if "clone_path" not in st.session_state: | |
st.session_state.clone_path = None | |
st.sidebar.header("𧬠Setup Your Clone Voice") | |
voice_option = st.sidebar.radio("Choose how to provide clone voice", ["Upload Voice"]) | |
if voice_option == "Upload Voice": | |
uploaded = st.sidebar.file_uploader("Upload a voice sample", type=["wav", "mp3", "m4a", "flac", "ogg"]) | |
if uploaded: | |
path = save_uploaded_audio(uploaded, "reference_voice.wav") | |
st.session_state.clone_path = path | |
st.success("β Voice uploaded and saved as your clone voice.") | |
# --- Conversation section --- | |
st.subheader("π£οΈ Ask with voice or type text below") | |
tab1, tab2 = st.tabs(["π€ Voice Input", "π¬ Text Input"]) | |
# --- VOICE INPUT TAB --- | |
with tab1: | |
user_voice = st.file_uploader("Upload your voice question", type=["wav", "mp3", "m4a", "flac", "ogg"]) | |
if user_voice: | |
user_voice_path = save_uploaded_audio(user_voice, "user_question.wav") | |
st.audio(user_voice_path) | |
# Step 1: Transcribe voice | |
st.info("Transcribing...") | |
result = whisper_model.transcribe(user_voice_path) | |
user_text = result["text"] | |
st.success(f"π You said: {user_text}") | |
# Step 2: Get LLM response | |
st.info("Thinking...") | |
response = groq_client.chat.completions.create( | |
model="llama3-8b-8192", | |
messages=[{"role": "user", "content": user_text}] | |
) | |
reply = response.choices[0].message.content | |
st.success(f"π€ AI says: {reply}") | |
# Step 3: Voice reply | |
if st.session_state.clone_path: | |
st.info("Cloning voice reply...") | |
voice_output_path = clone_and_generate_text(reply, st.session_state.clone_path) | |
st.audio(voice_output_path) | |
else: | |
st.warning("Upload your voice clone first in the sidebar.") | |
# --- TEXT INPUT TAB --- | |
with tab2: | |
user_input = st.text_input("Type your question here:") | |
if st.button("Send Text"): | |
if user_input.strip() == "": | |
st.warning("Please enter a message.") | |
else: | |
# Step 1: Get LLM response | |
st.info("Thinking...") | |
response = groq_client.chat.completions.create( | |
model="llama3-8b-8192", | |
messages=[{"role": "user", "content": user_input}] | |
) | |
reply = response.choices[0].message.content | |
st.success(f"π€ AI says: {reply}") | |
# Step 2: Voice reply | |
if st.session_state.clone_path: | |
st.info("Cloning voice reply...") | |
voice_output_path = clone_and_generate_text(reply, st.session_state.clone_path) | |
st.audio(voice_output_path) | |
else: | |
st.warning("Upload your voice clone first in the sidebar.") | |