Spaces:
Running
on
Zero
Running
on
Zero
ttsteam
commited on
Commit
·
f42b5fd
1
Parent(s):
4764ee2
IndicF5
Browse files
app.py
CHANGED
@@ -44,6 +44,7 @@ def synthesize_speech(text, ref_audio, ref_text):
|
|
44 |
repo_id = "ai4bharat/IndicF5"
|
45 |
model = AutoModel.from_pretrained(repo_id, trust_remote_code=True)
|
46 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
|
|
47 |
model = model.to(device)
|
48 |
|
49 |
# Example Data (Multiple Examples)
|
@@ -64,46 +65,69 @@ for example in EXAMPLES:
|
|
64 |
example["audio_data"] = audio_data
|
65 |
|
66 |
|
67 |
-
# Define Gradio interface with layout adjustments
|
68 |
-
with gr.Blocks() as iface:
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
|
73 |
-
|
74 |
|
75 |
-
|
76 |
|
77 |
-
|
78 |
-
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
|
91 |
-
|
92 |
-
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
104 |
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
|
107 |
-
|
108 |
-
if __name__ == "__main__":
|
109 |
-
iface.queue(50).launch()
|
|
|
44 |
repo_id = "ai4bharat/IndicF5"
|
45 |
model = AutoModel.from_pretrained(repo_id, trust_remote_code=True)
|
46 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
47 |
+
print("Device", device)
|
48 |
model = model.to(device)
|
49 |
|
50 |
# Example Data (Multiple Examples)
|
|
|
65 |
example["audio_data"] = audio_data
|
66 |
|
67 |
|
68 |
+
# # Define Gradio interface with layout adjustments
|
69 |
+
# with gr.Blocks() as iface:
|
70 |
+
# gr.Markdown(
|
71 |
+
# """
|
72 |
+
# # **IndicF5: High-Quality Text-to-Speech for Indian Languages**
|
73 |
|
74 |
+
# [](https://huggingface.co/ai4bharat/IndicF5)
|
75 |
|
76 |
+
# We release **IndicF5**, a **near-human polyglot** **Text-to-Speech (TTS)** model trained on **1417 hours** of high-quality speech from **[Rasa](https://huggingface.co/datasets/ai4bharat/Rasa), [IndicTTS](https://www.iitm.ac.in/donlab/indictts/database), [LIMMITS](https://sites.google.com/view/limmits24/), and [IndicVoices-R](https://huggingface.co/datasets/ai4bharat/indicvoices_r)**.
|
77 |
|
78 |
+
# IndicF5 supports **11 Indian languages**:
|
79 |
+
# **Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu.**
|
80 |
|
81 |
+
# Generate speech using a reference prompt audio and its corresponding text.
|
82 |
+
# """
|
83 |
+
# )
|
84 |
|
85 |
+
# with gr.Row():
|
86 |
+
# with gr.Column():
|
87 |
+
# text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter the text to convert to speech...", lines=3)
|
88 |
+
# ref_audio_input = gr.Audio(type="numpy", label="Reference Prompt Audio")
|
89 |
+
# ref_text_input = gr.Textbox(label="Text in Reference Prompt Audio", placeholder="Enter the transcript of the reference audio...", lines=2)
|
90 |
+
# submit_btn = gr.Button("🎤 Generate Speech", variant="primary")
|
91 |
|
92 |
+
# with gr.Column():
|
93 |
+
# output_audio = gr.Audio(label="Generated Speech", type="numpy")
|
94 |
|
95 |
+
# # Add multiple examples
|
96 |
+
# examples = [
|
97 |
+
# [ex["synth_text"], (ex["sample_rate"], ex["audio_data"]), ex["ref_text"]] for ex in EXAMPLES
|
98 |
+
# ]
|
99 |
|
100 |
+
# gr.Examples(
|
101 |
+
# examples=examples,
|
102 |
+
# inputs=[text_input, ref_audio_input, ref_text_input],
|
103 |
+
# label="Choose an example:"
|
104 |
+
# )
|
105 |
+
|
106 |
+
# submit_btn.click(synthesize_speech, inputs=[text_input, ref_audio_input, ref_text_input], outputs=[output_audio])
|
107 |
+
|
108 |
+
## FARZI CODE
|
109 |
+
with gr.Blocks() as iface:
|
110 |
+
gr.Markdown(
|
111 |
+
"""
|
112 |
+
# **IndicF5: High-Quality Text-to-Speech for Indian Languages**
|
113 |
+
Generate speech using a reference audio and corresponding text.
|
114 |
+
"""
|
115 |
+
)
|
116 |
|
117 |
+
with gr.Row():
|
118 |
+
text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter text here...")
|
119 |
+
ref_text_input = gr.Textbox(label="Reference Text", placeholder="Enter reference text here...")
|
120 |
+
|
121 |
+
with gr.Row():
|
122 |
+
ref_audio_input = gr.Audio(label="Reference Audio", type="numpy")
|
123 |
+
output_audio = gr.Audio(label="Synthesized Audio", type="numpy")
|
124 |
+
|
125 |
+
synthesize_button = gr.Button("Synthesize Speech")
|
126 |
+
|
127 |
+
synthesize_button.click(
|
128 |
+
synthesize_speech,
|
129 |
+
inputs=[text_input, ref_audio_input, ref_text_input],
|
130 |
+
outputs=output_audio
|
131 |
+
)
|
132 |
|
133 |
+
iface.launch()
|
|
|
|