Spaces:
Sleeping
Sleeping
File size: 3,321 Bytes
0ec1227 bef01d9 0ec1227 ff8e2d1 0ec1227 ff8e2d1 0ec1227 ff8e2d1 b2cade9 0ec1227 ff8e2d1 0ec1227 b2cade9 0ec1227 ff8e2d1 bd25b7f 0ec1227 ff8e2d1 0ec1227 b2cade9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import streamlit as st
from utils import save_uploaded_audio
from voice_cloner import clone_and_generate_text
import os
import whisper
import torchaudio
from groq import Groq
# Load Whisper model for transcription
whisper_model = whisper.load_model("tiny")
# Initialize Groq LLM client
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
st.set_page_config(page_title="Voice Chat", layout="centered")
st.title("π€π¬ Voice & Text Chat using Your Cloned Voice")
# Store cloned voice path
if "clone_path" not in st.session_state:
st.session_state.clone_path = None
st.sidebar.header("𧬠Setup Your Clone Voice")
voice_option = st.sidebar.radio("Choose how to provide clone voice", ["Upload Voice"])
if voice_option == "Upload Voice":
uploaded = st.sidebar.file_uploader("Upload a voice sample", type=["wav", "mp3", "m4a", "flac", "ogg"])
if uploaded:
path = save_uploaded_audio(uploaded, "reference_voice.wav")
st.session_state.clone_path = path
st.success("β
Voice uploaded and saved as your clone voice.")
# --- Conversation section ---
st.subheader("π£οΈ Ask with voice or type text below")
tab1, tab2 = st.tabs(["π€ Voice Input", "π¬ Text Input"])
# --- VOICE INPUT TAB ---
with tab1:
user_voice = st.file_uploader("Upload your voice question", type=["wav", "mp3", "m4a", "flac", "ogg"])
if user_voice:
user_voice_path = save_uploaded_audio(user_voice, "user_question.wav")
st.audio(user_voice_path)
# Step 1: Transcribe voice
st.info("Transcribing...")
result = whisper_model.transcribe(user_voice_path)
user_text = result["text"]
st.success(f"π You said: {user_text}")
# Step 2: Get LLM response
st.info("Thinking...")
response = groq_client.chat.completions.create(
model="llama3-8b-8192",
messages=[{"role": "user", "content": user_text}]
)
reply = response.choices[0].message.content
st.success(f"π€ AI says: {reply}")
# Step 3: Voice reply
if st.session_state.clone_path:
st.info("Cloning voice reply...")
voice_output_path = clone_and_generate_text(reply, st.session_state.clone_path)
st.audio(voice_output_path)
else:
st.warning("Upload your voice clone first in the sidebar.")
# --- TEXT INPUT TAB ---
with tab2:
user_input = st.text_input("Type your question here:")
if st.button("Send Text"):
if user_input.strip() == "":
st.warning("Please enter a message.")
else:
# Step 1: Get LLM response
st.info("Thinking...")
response = groq_client.chat.completions.create(
model="llama3-8b-8192",
messages=[{"role": "user", "content": user_input}]
)
reply = response.choices[0].message.content
st.success(f"π€ AI says: {reply}")
# Step 2: Voice reply
if st.session_state.clone_path:
st.info("Cloning voice reply...")
voice_output_path = clone_and_generate_text(reply, st.session_state.clone_path)
st.audio(voice_output_path)
else:
st.warning("Upload your voice clone first in the sidebar.")
|