Spaces:

Hammad112
/

Voice_clone

Sleeping

File size: 2,141 Bytes

84b3aae
 
 
3127b1b
 
 
84b3aae
dbe86d4
 
 
b8320e7
dbe86d4
 
b8320e7
 
84b3aae
 
 
 
 
eee2253
 
3127b1b
 
 
 
 
 
 
 
eee2253
 
3127b1b
eee2253
 
 
3127b1b
 
 
 
 
 
84b3aae
 
 
 
eee2253
b8320e7
 
 
 
eee2253
 
b8320e7
 
 
84b3aae
b8320e7
 
 
84b3aae
 
3127b1b

import streamlit as st
import outetts
from scipy.io.wavfile import write
import tempfile
import os
from pydub import AudioSegment

# Initialize model configuration
model_config = outetts.HFModelConfig_v1(
    model_path="OuteAI/OuteTTS-0.2-500M",
    language="en"  # Supported languages: en, zh, ja, ko
)

# Initialize the interface
interface = outetts.InterfaceHF(model_version="0.2", cfg=model_config)

# Streamlit UI
st.title("OuteTTS Speech Synthesis")
st.write("Enter text below to generate speech.")

# Sidebar for reference voice
st.sidebar.title("Voice Cloning")
reference_audio = st.sidebar.file_uploader("Upload a reference audio (any format)", type=["wav", "mp3", "ogg", "flac", "m4a"])

# Function to convert audio to WAV format
def convert_to_wav(audio_file):
    temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
    audio = AudioSegment.from_file(audio_file)
    audio.export(temp_audio.name, format="wav")
    return temp_audio.name

if reference_audio:
    ref_audio_path = convert_to_wav(reference_audio)
else:
    ref_audio_path = None

# Recording functionality
if ref_audio_path is None:
    st.sidebar.write("Or record your voice below:")
    if st.sidebar.button("Record Voice"):
        st.sidebar.warning("Recording functionality not implemented yet. Please upload a file.")

text_input = st.text_area("Text to convert to speech:", "Hello, this is an AI-generated voice.")

if st.button("Generate Speech"):
    with st.spinner("Generating audio..."):
        # Generate speech with reference audio
        output = interface.generate(
            text=text_input,
            temperature=0.1,
            repetition_penalty=1.1,
            max_length=4096,
            speaker_wav=ref_audio_path if ref_audio_path else None
        )
        
        # Save the synthesized speech to a file
        output_path = "output.wav"
        output.save(output_path)
        
        # Play the audio in the Streamlit app
        st.audio(output_path, format="audio/wav")
        st.success("Speech generated successfully!")

# Clean up temporary files
if ref_audio_path:
    os.remove(ref_audio_path)