ttsteam commited on
Commit
f42b5fd
·
1 Parent(s): 4764ee2
Files changed (1) hide show
  1. app.py +57 -33
app.py CHANGED
@@ -44,6 +44,7 @@ def synthesize_speech(text, ref_audio, ref_text):
44
  repo_id = "ai4bharat/IndicF5"
45
  model = AutoModel.from_pretrained(repo_id, trust_remote_code=True)
46
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
47
  model = model.to(device)
48
 
49
  # Example Data (Multiple Examples)
@@ -64,46 +65,69 @@ for example in EXAMPLES:
64
  example["audio_data"] = audio_data
65
 
66
 
67
- # Define Gradio interface with layout adjustments
68
- with gr.Blocks() as iface:
69
- gr.Markdown(
70
- """
71
- # **IndicF5: High-Quality Text-to-Speech for Indian Languages**
72
 
73
- [![Hugging Face](https://img.shields.io/badge/HuggingFace-Model-orange)](https://huggingface.co/ai4bharat/IndicF5)
74
 
75
- We release **IndicF5**, a **near-human polyglot** **Text-to-Speech (TTS)** model trained on **1417 hours** of high-quality speech from **[Rasa](https://huggingface.co/datasets/ai4bharat/Rasa), [IndicTTS](https://www.iitm.ac.in/donlab/indictts/database), [LIMMITS](https://sites.google.com/view/limmits24/), and [IndicVoices-R](https://huggingface.co/datasets/ai4bharat/indicvoices_r)**.
76
 
77
- IndicF5 supports **11 Indian languages**:
78
- **Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu.**
79
 
80
- Generate speech using a reference prompt audio and its corresponding text.
81
- """
82
- )
83
 
84
- # with gr.Row():
85
- # with gr.Column():
86
- # text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter the text to convert to speech...", lines=3)
87
- # ref_audio_input = gr.Audio(type="numpy", label="Reference Prompt Audio")
88
- # ref_text_input = gr.Textbox(label="Text in Reference Prompt Audio", placeholder="Enter the transcript of the reference audio...", lines=2)
89
- # submit_btn = gr.Button("🎤 Generate Speech", variant="primary")
90
 
91
- # with gr.Column():
92
- # output_audio = gr.Audio(label="Generated Speech", type="numpy")
93
 
94
- # # Add multiple examples
95
- # examples = [
96
- # [ex["synth_text"], (ex["sample_rate"], ex["audio_data"]), ex["ref_text"]] for ex in EXAMPLES
97
- # ]
98
 
99
- # gr.Examples(
100
- # examples=examples,
101
- # inputs=[text_input, ref_audio_input, ref_text_input],
102
- # label="Choose an example:"
103
- # )
 
 
 
 
 
 
 
 
 
 
 
104
 
105
- # submit_btn.click(synthesize_speech, inputs=[text_input, ref_audio_input, ref_text_input], outputs=[output_audio])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
- # Launch the app
108
- if __name__ == "__main__":
109
- iface.queue(50).launch()
 
44
  repo_id = "ai4bharat/IndicF5"
45
  model = AutoModel.from_pretrained(repo_id, trust_remote_code=True)
46
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
47
+ print("Device", device)
48
  model = model.to(device)
49
 
50
  # Example Data (Multiple Examples)
 
65
  example["audio_data"] = audio_data
66
 
67
 
68
+ # # Define Gradio interface with layout adjustments
69
+ # with gr.Blocks() as iface:
70
+ # gr.Markdown(
71
+ # """
72
+ # # **IndicF5: High-Quality Text-to-Speech for Indian Languages**
73
 
74
+ # [![Hugging Face](https://img.shields.io/badge/HuggingFace-Model-orange)](https://huggingface.co/ai4bharat/IndicF5)
75
 
76
+ # We release **IndicF5**, a **near-human polyglot** **Text-to-Speech (TTS)** model trained on **1417 hours** of high-quality speech from **[Rasa](https://huggingface.co/datasets/ai4bharat/Rasa), [IndicTTS](https://www.iitm.ac.in/donlab/indictts/database), [LIMMITS](https://sites.google.com/view/limmits24/), and [IndicVoices-R](https://huggingface.co/datasets/ai4bharat/indicvoices_r)**.
77
 
78
+ # IndicF5 supports **11 Indian languages**:
79
+ # **Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu.**
80
 
81
+ # Generate speech using a reference prompt audio and its corresponding text.
82
+ # """
83
+ # )
84
 
85
+ # with gr.Row():
86
+ # with gr.Column():
87
+ # text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter the text to convert to speech...", lines=3)
88
+ # ref_audio_input = gr.Audio(type="numpy", label="Reference Prompt Audio")
89
+ # ref_text_input = gr.Textbox(label="Text in Reference Prompt Audio", placeholder="Enter the transcript of the reference audio...", lines=2)
90
+ # submit_btn = gr.Button("🎤 Generate Speech", variant="primary")
91
 
92
+ # with gr.Column():
93
+ # output_audio = gr.Audio(label="Generated Speech", type="numpy")
94
 
95
+ # # Add multiple examples
96
+ # examples = [
97
+ # [ex["synth_text"], (ex["sample_rate"], ex["audio_data"]), ex["ref_text"]] for ex in EXAMPLES
98
+ # ]
99
 
100
+ # gr.Examples(
101
+ # examples=examples,
102
+ # inputs=[text_input, ref_audio_input, ref_text_input],
103
+ # label="Choose an example:"
104
+ # )
105
+
106
+ # submit_btn.click(synthesize_speech, inputs=[text_input, ref_audio_input, ref_text_input], outputs=[output_audio])
107
+
108
+ ## FARZI CODE
109
+ with gr.Blocks() as iface:
110
+ gr.Markdown(
111
+ """
112
+ # **IndicF5: High-Quality Text-to-Speech for Indian Languages**
113
+ Generate speech using a reference audio and corresponding text.
114
+ """
115
+ )
116
 
117
+ with gr.Row():
118
+ text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter text here...")
119
+ ref_text_input = gr.Textbox(label="Reference Text", placeholder="Enter reference text here...")
120
+
121
+ with gr.Row():
122
+ ref_audio_input = gr.Audio(label="Reference Audio", type="numpy")
123
+ output_audio = gr.Audio(label="Synthesized Audio", type="numpy")
124
+
125
+ synthesize_button = gr.Button("Synthesize Speech")
126
+
127
+ synthesize_button.click(
128
+ synthesize_speech,
129
+ inputs=[text_input, ref_audio_input, ref_text_input],
130
+ outputs=output_audio
131
+ )
132
 
133
+ iface.launch()