File size: 2,180 Bytes
0ec1227
 
bef01d9
0ec1227
 
 
 
 
ff8e2d1
0ec1227
 
ff8e2d1
0ec1227
 
ff8e2d1
 
0ec1227
ff8e2d1
0ec1227
 
 
ff8e2d1
 
0ec1227
ff8e2d1
 
0ec1227
ff8e2d1
0ec1227
ff8e2d1
0ec1227
ff8e2d1
 
0ec1227
 
 
 
 
 
ff8e2d1
 
0ec1227
 
ff8e2d1
0ec1227
ff8e2d1
 
0ec1227
 
 
 
 
ff8e2d1
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import streamlit as st
from utils import save_uploaded_audio
from voice_cloner import clone_and_generate_text
import os
import whisper
import torchaudio
from groq import Groq

# Load Whisper model for transcription
whisper_model = whisper.load_model("tiny")

# Initialize Groq LLM client
groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

st.set_page_config(page_title="Voice Chat", layout="centered")
st.title("🎀 Voice Chat using Your Cloned Voice")

# Store cloned voice path
if "clone_path" not in st.session_state:
    st.session_state.clone_path = None

st.sidebar.header("Setup Your Clone Voice")
voice_option = st.sidebar.radio("Choose how to provide clone voice", ["Upload Voice", "Record Voice"])

if voice_option == "Upload Voice":
    uploaded = st.sidebar.file_uploader("Upload a voice sample", type=["wav", "mp3"])
    if uploaded:
        path = save_uploaded_audio(uploaded, "reference_voice.wav")
        st.session_state.clone_path = path
        st.success("Voice uploaded and saved as your clone voice.")

# --- Main conversation section ---
st.subheader("πŸ—£οΈ Ask something using your voice")

user_voice = st.file_uploader("Upload your voice question", type=["wav", "mp3"])
if user_voice:
    user_voice_path = save_uploaded_audio(user_voice, "user_question.wav")
    st.audio(user_voice_path)

    # Step 1: Transcribe
    st.info("Transcribing...")
    result = whisper_model.transcribe(user_voice_path)
    user_text = result["text"]
    st.success(f"πŸ“ You said: {user_text}")

    # Step 2: Generate LLM response
    st.info("Thinking...")
    response = groq_client.chat.completions.create(
        model="llama3-8b-8192",
        messages=[{"role": "user", "content": user_text}]
    )
    reply = response.choices[0].message.content
    st.success(f"πŸ€– AI says: {reply}")

    # Step 3: Speak back in your clone voice
    if st.session_state.clone_path:
        st.info("Generating voice reply using your cloned voice...")
        voice_output_path = clone_and_generate_text(reply, st.session_state.clone_path)
        st.audio(voice_output_path)
    else:
        st.warning("Please upload your clone voice first in the sidebar.")