Spaces:
Running
on
Zero
Running
on
Zero
ttsteam
commited on
Commit
·
21af360
1
Parent(s):
7fbd573
IndicF5
Browse files- .gitignore +174 -0
- app.py +106 -0
- requirements.txt +27 -0
.gitignore
ADDED
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Byte-compiled / optimized / DLL files
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
|
6 |
+
# C extensions
|
7 |
+
*.so
|
8 |
+
|
9 |
+
# Distribution / packaging
|
10 |
+
.Python
|
11 |
+
build/
|
12 |
+
develop-eggs/
|
13 |
+
dist/
|
14 |
+
downloads/
|
15 |
+
eggs/
|
16 |
+
.eggs/
|
17 |
+
lib/
|
18 |
+
lib64/
|
19 |
+
parts/
|
20 |
+
sdist/
|
21 |
+
var/
|
22 |
+
wheels/
|
23 |
+
share/python-wheels/
|
24 |
+
*.egg-info/
|
25 |
+
.installed.cfg
|
26 |
+
*.egg
|
27 |
+
MANIFEST
|
28 |
+
|
29 |
+
# PyInstaller
|
30 |
+
# Usually these files are written by a python script from a template
|
31 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
32 |
+
*.manifest
|
33 |
+
*.spec
|
34 |
+
|
35 |
+
# Installer logs
|
36 |
+
pip-log.txt
|
37 |
+
pip-delete-this-directory.txt
|
38 |
+
|
39 |
+
# Unit test / coverage reports
|
40 |
+
htmlcov/
|
41 |
+
.tox/
|
42 |
+
.nox/
|
43 |
+
.coverage
|
44 |
+
.coverage.*
|
45 |
+
.cache
|
46 |
+
nosetests.xml
|
47 |
+
coverage.xml
|
48 |
+
*.cover
|
49 |
+
*.py,cover
|
50 |
+
.hypothesis/
|
51 |
+
.pytest_cache/
|
52 |
+
cover/
|
53 |
+
|
54 |
+
# Translations
|
55 |
+
*.mo
|
56 |
+
*.pot
|
57 |
+
|
58 |
+
# Django stuff:
|
59 |
+
*.log
|
60 |
+
local_settings.py
|
61 |
+
db.sqlite3
|
62 |
+
db.sqlite3-journal
|
63 |
+
|
64 |
+
# Flask stuff:
|
65 |
+
instance/
|
66 |
+
.webassets-cache
|
67 |
+
|
68 |
+
# Scrapy stuff:
|
69 |
+
.scrapy
|
70 |
+
|
71 |
+
# Sphinx documentation
|
72 |
+
docs/_build/
|
73 |
+
|
74 |
+
# PyBuilder
|
75 |
+
.pybuilder/
|
76 |
+
target/
|
77 |
+
|
78 |
+
# Jupyter Notebook
|
79 |
+
.ipynb_checkpoints
|
80 |
+
|
81 |
+
# IPython
|
82 |
+
profile_default/
|
83 |
+
ipython_config.py
|
84 |
+
|
85 |
+
# pyenv
|
86 |
+
# For a library or package, you might want to ignore these files since the code is
|
87 |
+
# intended to run in multiple environments; otherwise, check them in:
|
88 |
+
# .python-version
|
89 |
+
|
90 |
+
# pipenv
|
91 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
92 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
93 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
94 |
+
# install all needed dependencies.
|
95 |
+
#Pipfile.lock
|
96 |
+
|
97 |
+
# UV
|
98 |
+
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
99 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
100 |
+
# commonly ignored for libraries.
|
101 |
+
#uv.lock
|
102 |
+
|
103 |
+
# poetry
|
104 |
+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
105 |
+
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
106 |
+
# commonly ignored for libraries.
|
107 |
+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
108 |
+
#poetry.lock
|
109 |
+
|
110 |
+
# pdm
|
111 |
+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
112 |
+
#pdm.lock
|
113 |
+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
114 |
+
# in version control.
|
115 |
+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
116 |
+
.pdm.toml
|
117 |
+
.pdm-python
|
118 |
+
.pdm-build/
|
119 |
+
|
120 |
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
121 |
+
__pypackages__/
|
122 |
+
|
123 |
+
# Celery stuff
|
124 |
+
celerybeat-schedule
|
125 |
+
celerybeat.pid
|
126 |
+
|
127 |
+
# SageMath parsed files
|
128 |
+
*.sage.py
|
129 |
+
|
130 |
+
# Environments
|
131 |
+
.env
|
132 |
+
.venv
|
133 |
+
env/
|
134 |
+
venv/
|
135 |
+
ENV/
|
136 |
+
env.bak/
|
137 |
+
venv.bak/
|
138 |
+
|
139 |
+
# Spyder project settings
|
140 |
+
.spyderproject
|
141 |
+
.spyproject
|
142 |
+
|
143 |
+
# Rope project settings
|
144 |
+
.ropeproject
|
145 |
+
|
146 |
+
# mkdocs documentation
|
147 |
+
/site
|
148 |
+
|
149 |
+
# mypy
|
150 |
+
.mypy_cache/
|
151 |
+
.dmypy.json
|
152 |
+
dmypy.json
|
153 |
+
|
154 |
+
# Pyre type checker
|
155 |
+
.pyre/
|
156 |
+
|
157 |
+
# pytype static type analyzer
|
158 |
+
.pytype/
|
159 |
+
|
160 |
+
# Cython debug symbols
|
161 |
+
cython_debug/
|
162 |
+
|
163 |
+
# PyCharm
|
164 |
+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
165 |
+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
166 |
+
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
167 |
+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
168 |
+
#.idea/
|
169 |
+
|
170 |
+
# Ruff stuff:
|
171 |
+
.ruff_cache/
|
172 |
+
|
173 |
+
# PyPI configuration file
|
174 |
+
.pypirc
|
app.py
ADDED
@@ -0,0 +1,106 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
import librosa
|
3 |
+
import requests
|
4 |
+
import tempfile
|
5 |
+
import numpy as np
|
6 |
+
import gradio as gr
|
7 |
+
import soundfile as sf
|
8 |
+
from transformers import AutoModel
|
9 |
+
|
10 |
+
# Function to load reference audio from URL
|
11 |
+
def load_audio_from_url(url):
|
12 |
+
response = requests.get(url)
|
13 |
+
if response.status_code == 200:
|
14 |
+
audio_data, sample_rate = sf.read(io.BytesIO(response.content))
|
15 |
+
return sample_rate, audio_data
|
16 |
+
return None, None
|
17 |
+
|
18 |
+
def synthesize_speech(text, ref_audio, ref_text):
|
19 |
+
if ref_audio is None or ref_text.strip() == "":
|
20 |
+
return "Error: Please provide a reference audio and its corresponding text."
|
21 |
+
|
22 |
+
# Ensure valid reference audio input
|
23 |
+
if isinstance(ref_audio, tuple) and len(ref_audio) == 2:
|
24 |
+
sample_rate, audio_data = ref_audio
|
25 |
+
else:
|
26 |
+
return "Error: Invalid reference audio input."
|
27 |
+
|
28 |
+
# Save reference audio directly without resampling
|
29 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
|
30 |
+
sf.write(temp_audio.name, audio_data, samplerate=sample_rate, format='WAV')
|
31 |
+
temp_audio.flush()
|
32 |
+
|
33 |
+
audio = model(text, ref_audio_path=temp_audio.name, ref_text=ref_text)
|
34 |
+
|
35 |
+
# Normalize output and save
|
36 |
+
if audio.dtype == np.int16:
|
37 |
+
audio = audio.astype(np.float32) / 32768.0
|
38 |
+
|
39 |
+
return 24000, audio
|
40 |
+
|
41 |
+
|
42 |
+
# Load TTS model
|
43 |
+
repo_id = "ai4bharat/IndicF5"
|
44 |
+
model = AutoModel.from_pretrained(repo_id, trust_remote_code=True)
|
45 |
+
|
46 |
+
# Example Data (Multiple Examples)
|
47 |
+
EXAMPLES = [
|
48 |
+
{
|
49 |
+
"audio_name": "PAN_F (Happy)",
|
50 |
+
"audio_url": "https://github.com/AI4Bharat/IndicF5/raw/refs/heads/main/prompts/PAN_F_HAPPY_00001.wav",
|
51 |
+
"ref_text": "ਭਹੰਪੀ ਵਿੱਚ ਸਮਾਰਕਾਂ ਦੇ ਭਵਨ ਨਿਰਮਾਣ ਕਲਾ ਦੇ ਵੇਰਵੇ ਗੁੰਝਲਦਾਰ ਅਤੇ ਹੈਰਾਨ ਕਰਨ ਵਾਲੇ ਹਨ, ਜੋ ਮੈਨੂੰ ਖੁਸ਼ ਕਰਦੇ ਹਨ।",
|
52 |
+
"synth_text": "मैं बिना किसी चिंता के अपने दोस्तों को अपने ऑटोमोबाइल एक्सपर्ट के पास भेज देता हूँ क्योंकि मैं जानता हूँ कि वह निश्चित रूप से उनकी सभी जरूरतों पर खरा उतरेगा।"
|
53 |
+
},
|
54 |
+
]
|
55 |
+
|
56 |
+
|
57 |
+
# Preload all example audios
|
58 |
+
for example in EXAMPLES:
|
59 |
+
sample_rate, audio_data = load_audio_from_url(example["audio_url"])
|
60 |
+
example["sample_rate"] = sample_rate
|
61 |
+
example["audio_data"] = audio_data
|
62 |
+
|
63 |
+
|
64 |
+
# Define Gradio interface with layout adjustments
|
65 |
+
with gr.Blocks(css="body { font-family: Arial, sans-serif; }") as iface:
|
66 |
+
gr.Markdown(
|
67 |
+
"""
|
68 |
+
# **IndicF5: High-Quality Text-to-Speech for Indian Languages**
|
69 |
+
|
70 |
+
[](https://huggingface.co/ai4bharat/IndicF5)
|
71 |
+
|
72 |
+
We release **IndicF5**, a **near-human polyglot** **Text-to-Speech (TTS)** model trained on **1417 hours** of high-quality speech from **[Rasa](https://huggingface.co/datasets/ai4bharat/Rasa), [IndicTTS](https://www.iitm.ac.in/donlab/indictts/database), [LIMMITS](https://sites.google.com/view/limmits24/), and [IndicVoices-R](https://huggingface.co/datasets/ai4bharat/indicvoices_r)**.
|
73 |
+
|
74 |
+
IndicF5 supports **11 Indian languages**:
|
75 |
+
**Assamese, Bengali, Gujarati, Hindi, Kannada, Malayalam, Marathi, Odia, Punjabi, Tamil, Telugu.**
|
76 |
+
|
77 |
+
Generate speech using a reference prompt audio and its corresponding text.
|
78 |
+
"""
|
79 |
+
)
|
80 |
+
|
81 |
+
with gr.Row():
|
82 |
+
with gr.Column():
|
83 |
+
text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter the text to convert to speech...", lines=3)
|
84 |
+
ref_audio_input = gr.Audio(type="numpy", label="Reference Prompt Audio")
|
85 |
+
ref_text_input = gr.Textbox(label="Text in Reference Prompt Audio", placeholder="Enter the transcript of the reference audio...", lines=2)
|
86 |
+
submit_btn = gr.Button("🎤 Generate Speech", variant="primary")
|
87 |
+
|
88 |
+
with gr.Column():
|
89 |
+
output_audio = gr.Audio(label="Generated Speech", type="numpy")
|
90 |
+
|
91 |
+
# Add multiple examples
|
92 |
+
examples = [
|
93 |
+
[ex["synth_text"], (ex["sample_rate"], ex["audio_data"]), ex["ref_text"]] for ex in EXAMPLES
|
94 |
+
]
|
95 |
+
|
96 |
+
gr.Examples(
|
97 |
+
examples=examples,
|
98 |
+
inputs=[text_input, ref_audio_input, ref_text_input],
|
99 |
+
label="Choose an example:"
|
100 |
+
)
|
101 |
+
|
102 |
+
submit_btn.click(synthesize_speech, inputs=[text_input, ref_audio_input, ref_text_input], outputs=[output_audio])
|
103 |
+
|
104 |
+
# Launch the app
|
105 |
+
if __name__ == "__main__":
|
106 |
+
iface.launch(share=True)
|
requirements.txt
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
git+https://github.com/ai4bharat/IndicF5.git
|
2 |
+
accelerate>=0.33.0
|
3 |
+
bitsandbytes>0.37.0; platform_machine != 'arm64' and platform_system != 'Darwin'
|
4 |
+
cached_path
|
5 |
+
click
|
6 |
+
datasets
|
7 |
+
ema_pytorch>=0.5.2
|
8 |
+
gradio>=3.45.2
|
9 |
+
hydra-core>=1.3.0
|
10 |
+
jieba
|
11 |
+
librosa
|
12 |
+
matplotlib
|
13 |
+
numpy<=1.26.4
|
14 |
+
pydub
|
15 |
+
pypinyin
|
16 |
+
safetensors
|
17 |
+
soundfile
|
18 |
+
tomli
|
19 |
+
torch>=2.0.0
|
20 |
+
torchaudio>=2.0.0
|
21 |
+
torchdiffeq
|
22 |
+
tqdm>=4.65.0
|
23 |
+
transformers
|
24 |
+
transformers_stream_generator
|
25 |
+
vocos
|
26 |
+
wandb
|
27 |
+
x_transformers>=1.31.14
|