ttsteam commited on
Commit
ed1cc99
·
1 Parent(s): f42b5fd
Files changed (1) hide show
  1. app.py +29 -53
app.py CHANGED
@@ -65,69 +65,45 @@ for example in EXAMPLES:
65
  example["audio_data"] = audio_data
66
 
67
 
68
- # # Define Gradio interface with layout adjustments
69
- # with gr.Blocks() as iface:
70
- # gr.Markdown(
71
- # """
72
- # # **IndicF5: High-Quality Text-to-Speech for Indian Languages**
73
 
74
- # [![Hugging Face](https://img.shields.io/badge/HuggingFace-Model-orange)](https://huggingface.co/ai4bharat/IndicF5)
75
 
76
- # We release **IndicF5**, a **near-human polyglot** **Text-to-Speech (TTS)** model trained on **1417 hours** of high-quality speech from **[Rasa](https://huggingface.co/datasets/ai4bharat/Rasa), [IndicTTS](https://www.iitm.ac.in/donlab/indictts/database), [LIMMITS](https://sites.google.com/view/limmits24/), and [IndicVoices-R](https://huggingface.co/datasets/ai4bharat/indicvoices_r)**.
77
 
78
- # IndicF5 supports **11 Indian languages**:
79
- # **Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu.**
80
 
81
- # Generate speech using a reference prompt audio and its corresponding text.
82
- # """
83
- # )
84
 
85
- # with gr.Row():
86
- # with gr.Column():
87
- # text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter the text to convert to speech...", lines=3)
88
- # ref_audio_input = gr.Audio(type="numpy", label="Reference Prompt Audio")
89
- # ref_text_input = gr.Textbox(label="Text in Reference Prompt Audio", placeholder="Enter the transcript of the reference audio...", lines=2)
90
- # submit_btn = gr.Button("🎤 Generate Speech", variant="primary")
91
 
92
- # with gr.Column():
93
- # output_audio = gr.Audio(label="Generated Speech", type="numpy")
94
 
95
- # # Add multiple examples
96
- # examples = [
97
- # [ex["synth_text"], (ex["sample_rate"], ex["audio_data"]), ex["ref_text"]] for ex in EXAMPLES
98
- # ]
99
 
100
- # gr.Examples(
101
- # examples=examples,
102
- # inputs=[text_input, ref_audio_input, ref_text_input],
103
- # label="Choose an example:"
104
- # )
105
-
106
- # submit_btn.click(synthesize_speech, inputs=[text_input, ref_audio_input, ref_text_input], outputs=[output_audio])
107
-
108
- ## FARZI CODE
109
- with gr.Blocks() as iface:
110
- gr.Markdown(
111
- """
112
- # **IndicF5: High-Quality Text-to-Speech for Indian Languages**
113
- Generate speech using a reference audio and corresponding text.
114
- """
115
  )
116
 
117
- with gr.Row():
118
- text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter text here...")
119
- ref_text_input = gr.Textbox(label="Reference Text", placeholder="Enter reference text here...")
120
 
121
- with gr.Row():
122
- ref_audio_input = gr.Audio(label="Reference Audio", type="numpy")
123
- output_audio = gr.Audio(label="Synthesized Audio", type="numpy")
124
-
125
- synthesize_button = gr.Button("Synthesize Speech")
126
-
127
- synthesize_button.click(
128
- synthesize_speech,
129
- inputs=[text_input, ref_audio_input, ref_text_input],
130
- outputs=output_audio
131
- )
132
 
133
  iface.launch()
 
65
  example["audio_data"] = audio_data
66
 
67
 
68
+ # Define Gradio interface with layout adjustments
69
+ with gr.Blocks() as iface:
70
+ gr.Markdown(
71
+ """
72
+ # **IndicF5: High-Quality Text-to-Speech for Indian Languages**
73
 
74
+ [![Hugging Face](https://img.shields.io/badge/HuggingFace-Model-orange)](https://huggingface.co/ai4bharat/IndicF5)
75
 
76
+ We release **IndicF5**, a **near-human polyglot** **Text-to-Speech (TTS)** model trained on **1417 hours** of high-quality speech from **[Rasa](https://huggingface.co/datasets/ai4bharat/Rasa), [IndicTTS](https://www.iitm.ac.in/donlab/indictts/database), [LIMMITS](https://sites.google.com/view/limmits24/), and [IndicVoices-R](https://huggingface.co/datasets/ai4bharat/indicvoices_r)**.
77
 
78
+ IndicF5 supports **11 Indian languages**:
79
+ **Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu.**
80
 
81
+ Generate speech using a reference prompt audio and its corresponding text.
82
+ """
83
+ )
84
 
85
+ with gr.Row():
86
+ with gr.Column():
87
+ text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter the text to convert to speech...", lines=3)
88
+ ref_audio_input = gr.Audio(type="numpy", label="Reference Prompt Audio")
89
+ ref_text_input = gr.Textbox(label="Text in Reference Prompt Audio", placeholder="Enter the transcript of the reference audio...", lines=2)
90
+ submit_btn = gr.Button("🎤 Generate Speech", variant="primary")
91
 
92
+ with gr.Column():
93
+ output_audio = gr.Audio(label="Generated Speech", type="numpy")
94
 
95
+ # Add multiple examples
96
+ examples = [
97
+ [ex["synth_text"], (ex["sample_rate"], ex["audio_data"]), ex["ref_text"]] for ex in EXAMPLES
98
+ ]
99
 
100
+ gr.Examples(
101
+ examples=examples,
102
+ inputs=[text_input, ref_audio_input, ref_text_input],
103
+ label="Choose an example:"
 
 
 
 
 
 
 
 
 
 
 
104
  )
105
 
106
+ submit_btn.click(synthesize_speech, inputs=[text_input, ref_audio_input, ref_text_input], outputs=[output_audio])
 
 
107
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  iface.launch()