Spaces:
Runtime error
Runtime error
File size: 3,824 Bytes
5ca847f db3663c 5ca847f db3663c 5ca847f db3663c 5ca847f db3663c 5ca847f db3663c 5ca847f db3663c 5ca847f db3663c 5ca847f db3663c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import io
import os
import tempfile
from typing import List
import TTS.api
import torch
from pydub import AudioSegment
import gradio as gr # Gradio库
import config
device = "cuda" if torch.cuda.is_available() else "cpu"
models = {}
for id, model in config.models.items():
models[id] = TTS.api.TTS(model).to(device)
def synthesize_tts(
text: str = 'Hello, World!',
speaker_wavs: List[gr.File] = None,
speaker_idx: str = 'Ana Florence',
language: str = 'ja',
temperature: float = 0.65,
length_penalty: float = 1.0,
repetition_penalty: float = 2.0,
top_k: int = 50,
top_p: float = 0.8,
speed: float = 1.0,
enable_text_splitting: bool = True,
):
temp_files = []
try:
if speaker_wavs:
# Process each uploaded file
for speaker_wav in speaker_wavs:
speaker_wav_bytes = speaker_wav.read()
# Convert the uploaded audio file to a WAV format using pydub
try:
audio = AudioSegment.from_file(io.BytesIO(speaker_wav_bytes))
wav_buffer = io.BytesIO()
audio.export(wav_buffer, format="wav")
wav_buffer.seek(0) # Reset buffer position to the beginning
except Exception as e:
return f"Error processing audio file: {e}"
temp_wav_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
temp_wav_file.write(wav_buffer.read())
temp_wav_file.close()
temp_files.append(temp_wav_file.name)
output_buffer = io.BytesIO()
if temp_files:
models['multi'].tts_to_file(
text=text,
speaker_wav=temp_files,
language=language,
file_path=output_buffer,
temperature=temperature,
length_penalty=length_penalty,
repetition_penalty=repetition_penalty,
top_k=top_k,
top_p=top_p,
speed=speed,
enable_text_splitting=enable_text_splitting
)
else:
models['multi'].tts_to_file(
text=text,
speaker=speaker_idx,
language=language,
file_path=output_buffer,
temperature=temperature,
length_penalty=length_penalty,
repetition_penalty=repetition_penalty,
top_k=top_k,
top_p=top_p,
speed=speed,
enable_text_splitting=enable_text_splitting
)
output_buffer.seek(0)
return output_buffer.read()
finally:
for temp_file in temp_files:
if isinstance(temp_file, str) and os.path.exists(temp_file):
os.remove(temp_file)
# 创建Gradio界面
inputs = [
gr.Textbox(value="Hello, World!", label="Text to Synthesize"),
gr.File(file_types=["audio"], label="Speaker WAV files (optional)", optional=True, multiple=True),
gr.Textbox(value="Ana Florence", label="Speaker Index"),
gr.Textbox(value="ja", label="Language"),
gr.Slider(0, 1, value=0.65, step=0.01, label="Temperature"),
gr.Slider(0.5, 2, value=1.0, step=0.1, label="Length Penalty"),
gr.Slider(1, 10, value=2.0, step=0.1, label="Repetition Penalty"),
gr.Slider(1, 100, value=50, step=1, label="Top-K"),
gr.Slider(0, 1, value=0.8, step=0.01, label="Top-P"),
gr.Slider(0.5, 2, value=1.0, step=0.01, label="Speed"),
gr.Checkbox(value=True, label="Enable Text Splitting")
]
outputs = gr.Audio(label="Generated Speech")
gr.Interface(
fn=synthesize_tts,
inputs=inputs,
outputs=outputs,
title="Text-to-Speech Synthesis with Gradio"
).launch()
|