MH0386 commited on
Commit
b9a47ba
Β·
verified Β·
1 Parent(s): 8daac41

Upload folder using huggingface_hub

Browse files
.deepsource.toml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version = 1
2
+
3
+ [[analyzers]]
4
+ name = "python"
5
+ dependency_file_paths = ["requirements.txt", "pyproject.toml"]
6
+
7
+ [analyzers.meta]
8
+ runtime_version = "3.x.x"
9
+ type_checker = "mypy"
10
+
11
+ [[analyzers]]
12
+ name = "docker"
13
+
14
+ [[transformers]]
15
+ name = "ruff"
16
+
17
+ [[transformers]]
18
+ name = "isort"
19
+
20
+ [[analyzers]]
21
+ name = "secrets"
22
+
23
+ [[analyzers]]
24
+ name = "terraform"
.dockerignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ .flox/
2
+ .github/
3
+ .vscode/
4
+ README.md
5
+ renovate.json
6
+ tmp/
7
+ .ruff_cache/
8
+ .mypy_cache/
9
+ **/__pycache__/
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ tmp/
2
+ .venv/
3
+ .ruff_cache/
4
+ .mypy_cache/
5
+ **/__pycache__/
6
+ .env
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.12
Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ghcr.io/astral-sh/uv:debian-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN groupadd nonroot && useradd -g nonroot nonroot
6
+
7
+ # Enable bytecode compilation, Copy from the cache instead of linking since it's a mounted volume
8
+ ENV UV_COMPILE_BYTECODE=1 \
9
+ UV_LINK_MODE=copy
10
+
11
+ # skipcq: DOK-DL3008
12
+ RUN apt-get update && \
13
+ apt-get install -qq -y --no-install-recommends espeak-ng && \
14
+ apt-get clean && \
15
+ rm -rf /var/lib/apt/lists/*
16
+
17
+ # Install the project's dependencies using the lockfile and settings
18
+ RUN --mount=type=cache,target=/root/.cache/uv \
19
+ --mount=type=bind,source=uv.lock,target=uv.lock \
20
+ --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
21
+ --mount=type=bind,source=.python-version,target=.python-version \
22
+ uv sync --frozen --no-install-project --no-dev
23
+
24
+ COPY . /app
25
+ RUN --mount=type=cache,target=/root/.cache/uv \
26
+ uv sync --frozen --no-dev;
27
+
28
+ # Place executables in the environment at the front of the path
29
+ ENV PATH=/app/.venv/bin:$PATH
30
+
31
+ USER nonroot
32
+ # Reset the entrypoint, don't invoke `uv`
33
+ ENTRYPOINT []
34
+
35
+ CMD ["python", "src/vocalizr"]
README.md CHANGED
@@ -1,12 +1,9 @@
1
- ---
2
- title: Vocalizr
3
- emoji: πŸ“Š
4
- colorFrom: purple
5
- colorTo: yellow
6
- sdk: gradio
7
- sdk_version: 5.29.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Vocalizr
3
+ emoji: πŸ”Š
4
+ colorFrom: purple
5
+ colorTo: yellow
6
+ sdk: docker
7
+ ---
8
+
9
+ # Vocalizr: Voice Generator part of the Chatacter Backend
 
 
 
pyproject.toml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "vocalizr"
3
+ version = "0.1.0"
4
+ description = "Voice Generator part of the Chatacter Backend"
5
+ readme = "README.md"
6
+ requires-python = ">=3.12"
7
+ dependencies = [
8
+ "en-core-web-sm",
9
+ "gradio[mcp]>=5.29.0",
10
+ "kokoro>=0.9.4",
11
+ "soundfile>=0.13.1",
12
+ ]
13
+
14
+ [build-system]
15
+ requires = ["hatchling"]
16
+ build-backend = "hatchling.build"
17
+
18
+ [project.scripts]
19
+ vocalizr = "vocalizr.__main__:main"
20
+
21
+ [dependency-groups]
22
+ dev = [
23
+ "mypy>=1.15.0",
24
+ "pylint>=3.3.7",
25
+ "pyrefly>=0.14.0",
26
+ "ruff>=0.11.8",
27
+ "typos>=1.32.0",
28
+ "black>=25.1.0",
29
+ "pyright>=1.1.400",
30
+ "watchfiles>=1.0.5",
31
+ "huggingface-hub[hf-transfer]>=0.31.1",
32
+ ]
33
+
34
+ [tool.typos.default.extend-words]
35
+ Chatacter = "Chatacter"
36
+
37
+ [tool.pyrefly]
38
+ python_interpreter = ".venv/Scripts/python"
39
+
40
+ [tool.mypy]
41
+ disable = ["E1101"]
42
+ ignore_missing_imports = true
43
+
44
+ [tool.pylint]
45
+ disable = ["E1101", "C0114"]
46
+
47
+ [tool.uv.sources]
48
+ en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
requirements.txt ADDED
@@ -0,0 +1,425 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv export --no-hashes --no-editable --no-dev -o requirements.txt
3
+ .
4
+ addict==2.4.0
5
+ # via misaki
6
+ aiofiles==24.1.0
7
+ # via gradio
8
+ annotated-types==0.7.0
9
+ # via pydantic
10
+ anyio==4.9.0
11
+ # via
12
+ # gradio
13
+ # httpx
14
+ # mcp
15
+ # sse-starlette
16
+ # starlette
17
+ attrs==25.3.0
18
+ # via
19
+ # csvw
20
+ # jsonschema
21
+ # phonemizer-fork
22
+ # referencing
23
+ audioop-lts==0.2.1 ; python_full_version >= '3.13'
24
+ # via gradio
25
+ babel==2.17.0
26
+ # via csvw
27
+ blis==1.3.0
28
+ # via thinc
29
+ catalogue==2.0.10
30
+ # via
31
+ # spacy
32
+ # srsly
33
+ # thinc
34
+ certifi==2025.4.26
35
+ # via
36
+ # httpcore
37
+ # httpx
38
+ # requests
39
+ cffi==1.17.1
40
+ # via soundfile
41
+ charset-normalizer==3.4.2
42
+ # via requests
43
+ click==8.1.8
44
+ # via
45
+ # typer
46
+ # uvicorn
47
+ cloudpathlib==0.21.0
48
+ # via weasel
49
+ colorama==0.4.6
50
+ # via
51
+ # click
52
+ # csvw
53
+ # loguru
54
+ # tqdm
55
+ # wasabi
56
+ confection==0.1.5
57
+ # via
58
+ # thinc
59
+ # weasel
60
+ csvw==3.5.1
61
+ # via segments
62
+ curated-tokenizers==0.0.9
63
+ # via spacy-curated-transformers
64
+ curated-transformers==0.1.1
65
+ # via spacy-curated-transformers
66
+ cymem==2.0.11
67
+ # via
68
+ # preshed
69
+ # spacy
70
+ # thinc
71
+ dlinfo==2.0.0
72
+ # via phonemizer-fork
73
+ docopt==0.6.2
74
+ # via num2words
75
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
76
+ # via voice-generator
77
+ espeakng-loader==0.2.4
78
+ # via misaki
79
+ fastapi==0.115.12
80
+ # via gradio
81
+ ffmpy==0.5.0
82
+ # via gradio
83
+ filelock==3.18.0
84
+ # via
85
+ # huggingface-hub
86
+ # torch
87
+ # transformers
88
+ fsspec==2025.3.2
89
+ # via
90
+ # gradio-client
91
+ # huggingface-hub
92
+ # torch
93
+ gradio==5.29.0
94
+ # via voice-generator
95
+ gradio-client==1.10.0
96
+ # via gradio
97
+ groovy==0.1.2
98
+ # via gradio
99
+ h11==0.16.0
100
+ # via
101
+ # httpcore
102
+ # uvicorn
103
+ httpcore==1.0.9
104
+ # via httpx
105
+ httpx==0.28.1
106
+ # via
107
+ # gradio
108
+ # gradio-client
109
+ # mcp
110
+ # safehttpx
111
+ httpx-sse==0.4.0
112
+ # via mcp
113
+ huggingface-hub==0.30.2
114
+ # via
115
+ # gradio
116
+ # gradio-client
117
+ # kokoro
118
+ # tokenizers
119
+ # transformers
120
+ idna==3.10
121
+ # via
122
+ # anyio
123
+ # httpx
124
+ # requests
125
+ isodate==0.7.2
126
+ # via csvw
127
+ jinja2==3.1.6
128
+ # via
129
+ # gradio
130
+ # spacy
131
+ # torch
132
+ joblib==1.5.0
133
+ # via phonemizer-fork
134
+ jsonschema==4.23.0
135
+ # via csvw
136
+ jsonschema-specifications==2025.4.1
137
+ # via jsonschema
138
+ kokoro==0.9.4
139
+ # via voice-generator
140
+ langcodes==3.5.0
141
+ # via spacy
142
+ language-data==1.3.0
143
+ # via langcodes
144
+ language-tags==1.2.0
145
+ # via csvw
146
+ loguru==0.7.3
147
+ # via kokoro
148
+ marisa-trie==1.2.1
149
+ # via language-data
150
+ markdown-it-py==3.0.0
151
+ # via rich
152
+ markupsafe==3.0.2
153
+ # via
154
+ # gradio
155
+ # jinja2
156
+ mcp==1.7.1
157
+ # via gradio
158
+ mdurl==0.1.2
159
+ # via markdown-it-py
160
+ misaki==0.9.4
161
+ # via kokoro
162
+ mpmath==1.3.0
163
+ # via sympy
164
+ murmurhash==1.0.12
165
+ # via
166
+ # preshed
167
+ # spacy
168
+ # thinc
169
+ networkx==3.4.2
170
+ # via torch
171
+ num2words==0.5.14
172
+ # via misaki
173
+ numpy==2.2.5
174
+ # via
175
+ # blis
176
+ # gradio
177
+ # kokoro
178
+ # pandas
179
+ # soundfile
180
+ # spacy
181
+ # thinc
182
+ # transformers
183
+ nvidia-cublas-cu12==12.6.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
184
+ # via
185
+ # nvidia-cudnn-cu12
186
+ # nvidia-cusolver-cu12
187
+ # torch
188
+ nvidia-cuda-cupti-cu12==12.6.80 ; platform_machine == 'x86_64' and sys_platform == 'linux'
189
+ # via torch
190
+ nvidia-cuda-nvrtc-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
191
+ # via torch
192
+ nvidia-cuda-runtime-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
193
+ # via torch
194
+ nvidia-cudnn-cu12==9.5.1.17 ; platform_machine == 'x86_64' and sys_platform == 'linux'
195
+ # via torch
196
+ nvidia-cufft-cu12==11.3.0.4 ; platform_machine == 'x86_64' and sys_platform == 'linux'
197
+ # via torch
198
+ nvidia-cufile-cu12==1.11.1.6 ; platform_machine == 'x86_64' and sys_platform == 'linux'
199
+ # via torch
200
+ nvidia-curand-cu12==10.3.7.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
201
+ # via torch
202
+ nvidia-cusolver-cu12==11.7.1.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
203
+ # via torch
204
+ nvidia-cusparse-cu12==12.5.4.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
205
+ # via
206
+ # nvidia-cusolver-cu12
207
+ # torch
208
+ nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
209
+ # via torch
210
+ nvidia-nccl-cu12==2.26.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
211
+ # via torch
212
+ nvidia-nvjitlink-cu12==12.6.85 ; platform_machine == 'x86_64' and sys_platform == 'linux'
213
+ # via
214
+ # nvidia-cufft-cu12
215
+ # nvidia-cusolver-cu12
216
+ # nvidia-cusparse-cu12
217
+ # torch
218
+ nvidia-nvtx-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
219
+ # via torch
220
+ orjson==3.10.18
221
+ # via gradio
222
+ packaging==25.0
223
+ # via
224
+ # gradio
225
+ # gradio-client
226
+ # huggingface-hub
227
+ # spacy
228
+ # thinc
229
+ # transformers
230
+ # weasel
231
+ pandas==2.2.3
232
+ # via gradio
233
+ phonemizer-fork==3.3.2
234
+ # via misaki
235
+ pillow==11.2.1
236
+ # via gradio
237
+ preshed==3.0.9
238
+ # via
239
+ # spacy
240
+ # thinc
241
+ pycparser==2.22
242
+ # via cffi
243
+ pydantic==2.11.4
244
+ # via
245
+ # confection
246
+ # fastapi
247
+ # gradio
248
+ # mcp
249
+ # pydantic-settings
250
+ # spacy
251
+ # thinc
252
+ # weasel
253
+ pydantic-core==2.33.2
254
+ # via pydantic
255
+ pydantic-settings==2.9.1
256
+ # via mcp
257
+ pydub==0.25.1
258
+ # via gradio
259
+ pygments==2.19.1
260
+ # via rich
261
+ pyparsing==3.2.3
262
+ # via rdflib
263
+ python-dateutil==2.9.0.post0
264
+ # via
265
+ # csvw
266
+ # pandas
267
+ python-dotenv==1.1.0
268
+ # via pydantic-settings
269
+ python-multipart==0.0.20
270
+ # via
271
+ # gradio
272
+ # mcp
273
+ pytz==2025.2
274
+ # via pandas
275
+ pyyaml==6.0.2
276
+ # via
277
+ # gradio
278
+ # huggingface-hub
279
+ # transformers
280
+ rdflib==7.1.4
281
+ # via csvw
282
+ referencing==0.36.2
283
+ # via
284
+ # jsonschema
285
+ # jsonschema-specifications
286
+ regex==2024.11.6
287
+ # via
288
+ # curated-tokenizers
289
+ # misaki
290
+ # segments
291
+ # transformers
292
+ requests==2.32.3
293
+ # via
294
+ # csvw
295
+ # huggingface-hub
296
+ # spacy
297
+ # transformers
298
+ # weasel
299
+ rfc3986==1.5.0
300
+ # via csvw
301
+ rich==14.0.0
302
+ # via typer
303
+ rpds-py==0.24.0
304
+ # via
305
+ # jsonschema
306
+ # referencing
307
+ ruff==0.11.8 ; sys_platform != 'emscripten'
308
+ # via gradio
309
+ safehttpx==0.1.6
310
+ # via gradio
311
+ safetensors==0.5.3
312
+ # via transformers
313
+ segments==2.3.0
314
+ # via phonemizer-fork
315
+ semantic-version==2.10.0
316
+ # via gradio
317
+ setuptools==80.3.1
318
+ # via
319
+ # marisa-trie
320
+ # spacy
321
+ # thinc
322
+ # torch
323
+ # triton
324
+ shellingham==1.5.4
325
+ # via typer
326
+ six==1.17.0
327
+ # via python-dateutil
328
+ smart-open==7.1.0
329
+ # via weasel
330
+ sniffio==1.3.1
331
+ # via anyio
332
+ soundfile==0.13.1
333
+ # via voice-generator
334
+ spacy==3.8.5
335
+ # via misaki
336
+ spacy-curated-transformers==0.3.0
337
+ # via misaki
338
+ spacy-legacy==3.0.12
339
+ # via spacy
340
+ spacy-loggers==1.0.5
341
+ # via spacy
342
+ srsly==2.5.1
343
+ # via
344
+ # confection
345
+ # spacy
346
+ # thinc
347
+ # weasel
348
+ sse-starlette==2.3.4
349
+ # via mcp
350
+ starlette==0.46.2
351
+ # via
352
+ # fastapi
353
+ # gradio
354
+ # mcp
355
+ # sse-starlette
356
+ sympy==1.14.0
357
+ # via torch
358
+ thinc==8.3.6
359
+ # via spacy
360
+ tokenizers==0.21.1
361
+ # via transformers
362
+ tomlkit==0.13.2
363
+ # via gradio
364
+ torch==2.7.0
365
+ # via
366
+ # curated-transformers
367
+ # kokoro
368
+ # spacy-curated-transformers
369
+ tqdm==4.67.1
370
+ # via
371
+ # huggingface-hub
372
+ # spacy
373
+ # transformers
374
+ transformers==4.51.3
375
+ # via kokoro
376
+ triton==3.3.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
377
+ # via torch
378
+ typer==0.15.3
379
+ # via
380
+ # gradio
381
+ # spacy
382
+ # weasel
383
+ typing-extensions==4.13.2
384
+ # via
385
+ # anyio
386
+ # fastapi
387
+ # gradio
388
+ # gradio-client
389
+ # huggingface-hub
390
+ # phonemizer-fork
391
+ # pydantic
392
+ # pydantic-core
393
+ # referencing
394
+ # torch
395
+ # typer
396
+ # typing-inspection
397
+ typing-inspection==0.4.0
398
+ # via
399
+ # pydantic
400
+ # pydantic-settings
401
+ tzdata==2025.2
402
+ # via pandas
403
+ uritemplate==4.1.1
404
+ # via csvw
405
+ urllib3==2.4.0
406
+ # via
407
+ # gradio
408
+ # requests
409
+ uvicorn==0.34.2 ; sys_platform != 'emscripten'
410
+ # via
411
+ # gradio
412
+ # mcp
413
+ wasabi==1.1.3
414
+ # via
415
+ # spacy
416
+ # thinc
417
+ # weasel
418
+ weasel==0.4.1
419
+ # via spacy
420
+ websockets==15.0.1
421
+ # via gradio-client
422
+ win32-setctime==1.2.0 ; sys_platform == 'win32'
423
+ # via loguru
424
+ wrapt==1.17.2
425
+ # via smart-open
src/vocalizr/__init__.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from os import getenv
2
+ from pathlib import Path
3
+
4
+ from dotenv import load_dotenv
5
+ from kokoro import KPipeline
6
+ from loguru import logger
7
+ from torch import cuda
8
+
9
+ load_dotenv()
10
+
11
+ BASE_DIR: Path = Path(__file__).parent.parent.parent
12
+ DEBUG: bool = getenv(key="DEBUG", default="False").lower() == "true"
13
+ CHAR_LIMIT: int = int(getenv(key="CHAR_LIMIT", default="5000"))
14
+ SERVER_NAME: str = getenv(key="GRADIO_SERVER_NAME", default="localhost")
15
+ SERVER_PORT: int = int(getenv(key="GRADIO_SERVER_PORT", default="8080"))
16
+ PIPELINE: KPipeline = KPipeline(lang_code="a")
17
+ CUDA_AVAILABLE: bool = cuda.is_available()
18
+
19
+ logger.info(f"CUDA Available: {CUDA_AVAILABLE}")
20
+
21
+ CHOICES: dict[str, str] = {
22
+ "πŸ‡ΊπŸ‡Έ 🚺 Heart ❀️": "af_heart",
23
+ "πŸ‡ΊπŸ‡Έ 🚺 Bella πŸ”₯": "af_bella",
24
+ "πŸ‡ΊπŸ‡Έ 🚺 Nicole 🎧": "af_nicole",
25
+ "πŸ‡ΊπŸ‡Έ 🚺 Aoede": "af_aoede",
26
+ "πŸ‡ΊπŸ‡Έ 🚺 Kore": "af_kore",
27
+ "πŸ‡ΊπŸ‡Έ 🚺 Sarah": "af_sarah",
28
+ "πŸ‡ΊπŸ‡Έ 🚺 Nova": "af_nova",
29
+ "πŸ‡ΊπŸ‡Έ 🚺 Sky": "af_sky",
30
+ "πŸ‡ΊπŸ‡Έ 🚺 Alloy": "af_alloy",
31
+ "πŸ‡ΊπŸ‡Έ 🚺 Jessica": "af_jessica",
32
+ "πŸ‡ΊπŸ‡Έ 🚺 River": "af_river",
33
+ "πŸ‡ΊπŸ‡Έ 🚹 Michael": "am_michael",
34
+ "πŸ‡ΊπŸ‡Έ 🚹 Fenrir": "am_fenrir",
35
+ "πŸ‡ΊπŸ‡Έ 🚹 Puck": "am_puck",
36
+ "πŸ‡ΊπŸ‡Έ 🚹 Echo": "am_echo",
37
+ "πŸ‡ΊπŸ‡Έ 🚹 Eric": "am_eric",
38
+ "πŸ‡ΊπŸ‡Έ 🚹 Liam": "am_liam",
39
+ "πŸ‡ΊπŸ‡Έ 🚹 Onyx": "am_onyx",
40
+ "πŸ‡ΊπŸ‡Έ 🚹 Santa": "am_santa",
41
+ "πŸ‡ΊπŸ‡Έ 🚹 Adam": "am_adam",
42
+ "πŸ‡¬πŸ‡§ 🚺 Emma": "bf_emma",
43
+ "πŸ‡¬πŸ‡§ 🚺 Isabella": "bf_isabella",
44
+ "πŸ‡¬πŸ‡§ 🚺 Alice": "bf_alice",
45
+ "πŸ‡¬πŸ‡§ 🚺 Lily": "bf_lily",
46
+ "πŸ‡¬πŸ‡§ 🚹 George": "bm_george",
47
+ "πŸ‡¬πŸ‡§ 🚹 Fable": "bm_fable",
48
+ "πŸ‡¬πŸ‡§ 🚹 Lewis": "bm_lewis",
49
+ "πŸ‡¬πŸ‡§ 🚹 Daniel": "bm_daniel",
50
+ }
src/vocalizr/__main__.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio import Blocks
2
+
3
+ from vocalizr import DEBUG, SERVER_NAME, SERVER_PORT
4
+ from vocalizr.gui import app_block
5
+
6
+
7
+ def main() -> None:
8
+ """Launch the Gradio voice generation web application."""
9
+ app: Blocks = app_block()
10
+ app.launch(
11
+ server_name=SERVER_NAME,
12
+ server_port=SERVER_PORT,
13
+ debug=DEBUG,
14
+ mcp_server=True,
15
+ show_api=True,
16
+ enable_monitoring=True,
17
+ show_error=True,
18
+ )
19
+
20
+
21
+ if __name__ == "__main__":
22
+ main()
src/vocalizr/gui.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio import (
2
+ Audio,
3
+ Blocks,
4
+ Button,
5
+ Checkbox,
6
+ Column,
7
+ Dropdown,
8
+ Row,
9
+ Slider,
10
+ Textbox,
11
+ )
12
+
13
+ from vocalizr import CHAR_LIMIT, CHOICES, CUDA_AVAILABLE
14
+ from vocalizr.model import generate_audio_for_text
15
+
16
+
17
+ def app_block() -> Blocks:
18
+ """Create and return the main application interface.
19
+
20
+ :return: Blocks: The complete Gradio application interface
21
+ """
22
+ with Blocks() as app:
23
+ with Row():
24
+ with Column():
25
+ text: Textbox = Textbox(
26
+ label="Input Text",
27
+ info=(
28
+ f"""
29
+ Up to ~500 characters per Generate,
30
+ or {"∞" if CHAR_LIMIT is None else CHAR_LIMIT}
31
+ characters per Stream
32
+ """
33
+ ),
34
+ )
35
+ with Row():
36
+ voice: Dropdown = Dropdown(
37
+ choices=list(CHOICES.items()),
38
+ value="af_heart",
39
+ label="Voice",
40
+ info="Quality and availability vary by language",
41
+ )
42
+ Dropdown(
43
+ choices=[("GPU πŸš€", True), ("CPU 🐌", False)],
44
+ value=CUDA_AVAILABLE,
45
+ label="Hardware",
46
+ info="GPU is usually faster, but has a usage quota",
47
+ interactive=CUDA_AVAILABLE,
48
+ )
49
+ save_file = Checkbox(
50
+ label="Save Audio", info="Save audio to local storage"
51
+ )
52
+ speed: Slider = Slider(
53
+ minimum=0.5,
54
+ maximum=2,
55
+ value=1,
56
+ step=0.1,
57
+ label="Speed",
58
+ )
59
+ with Column():
60
+ out_audio: Audio = Audio(
61
+ label="Output Audio",
62
+ interactive=False,
63
+ streaming=False,
64
+ autoplay=True,
65
+ )
66
+ generate_btn: Button = Button("Generate", variant="primary")
67
+ generate_btn.click(
68
+ fn=generate_audio_for_text,
69
+ inputs=[text, voice, speed, save_file],
70
+ outputs=[out_audio],
71
+ )
72
+ return app
src/vocalizr/model.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from os import makedirs
3
+
4
+ from gradio import Error
5
+ from loguru import logger
6
+ from numpy import ndarray
7
+ from soundfile import write
8
+ from torch import Tensor
9
+
10
+ from vocalizr import BASE_DIR, CHAR_LIMIT, PIPELINE
11
+
12
+
13
+ def save_file_wav(audio: ndarray) -> None:
14
+ """Save audio data to a WAV file in the 'results' directory.
15
+
16
+ Creates a timestamped WAV file in the 'results' directory with
17
+ the provided audio data at a fixed sample rate of 24,000 Hz.
18
+
19
+ :param audio: Data to save.
20
+ :return: None
21
+ :raise OSError: If an error occurs while saving the file.
22
+ """
23
+ makedirs(name="results", exist_ok=True)
24
+ filename = f"{BASE_DIR}/results/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.wav"
25
+ try:
26
+ logger.info(f"Saving audio to {filename}")
27
+ write(filename, audio, 24000)
28
+ except OSError as e:
29
+ raise OSError(f"Failed to save audio to {filename}: {e}") from e
30
+
31
+
32
+ def generate_audio_for_text(
33
+ text: str, voice="af_heart", speed=1, save_file: bool = False
34
+ ) -> tuple[int, ndarray]:
35
+ """Generate audio for the input text.
36
+
37
+ :param text: Input text to convert to speech
38
+ :param voice: Voice identifier
39
+ :param speed: Speech speed multiplier
40
+ :param save_file: If to save the audio file to disk.
41
+ :return: Tuple containing the audio sample rate and raw audio data.
42
+ :raise Error: If an error occurs during generation.
43
+ """
44
+ text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
45
+ try:
46
+ for _, _, audio in PIPELINE(text, voice, speed):
47
+ audio = Tensor(audio).numpy()
48
+ if save_file:
49
+ save_file_wav(audio)
50
+ return 24000, audio
51
+ except Error as e:
52
+ raise Error(str(e)) from e
53
+ raise RuntimeError("No audio generated")
uv.lock ADDED
The diff for this file is too large to render. See raw diff