Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- .deepsource.toml +24 -0
- .dockerignore +9 -0
- .gitignore +6 -0
- .python-version +1 -0
- Dockerfile +35 -0
- README.md +9 -12
- pyproject.toml +48 -0
- requirements.txt +425 -0
- src/vocalizr/__init__.py +50 -0
- src/vocalizr/__main__.py +22 -0
- src/vocalizr/gui.py +72 -0
- src/vocalizr/model.py +53 -0
- uv.lock +0 -0
.deepsource.toml
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
version = 1
|
2 |
+
|
3 |
+
[[analyzers]]
|
4 |
+
name = "python"
|
5 |
+
dependency_file_paths = ["requirements.txt", "pyproject.toml"]
|
6 |
+
|
7 |
+
[analyzers.meta]
|
8 |
+
runtime_version = "3.x.x"
|
9 |
+
type_checker = "mypy"
|
10 |
+
|
11 |
+
[[analyzers]]
|
12 |
+
name = "docker"
|
13 |
+
|
14 |
+
[[transformers]]
|
15 |
+
name = "ruff"
|
16 |
+
|
17 |
+
[[transformers]]
|
18 |
+
name = "isort"
|
19 |
+
|
20 |
+
[[analyzers]]
|
21 |
+
name = "secrets"
|
22 |
+
|
23 |
+
[[analyzers]]
|
24 |
+
name = "terraform"
|
.dockerignore
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.flox/
|
2 |
+
.github/
|
3 |
+
.vscode/
|
4 |
+
README.md
|
5 |
+
renovate.json
|
6 |
+
tmp/
|
7 |
+
.ruff_cache/
|
8 |
+
.mypy_cache/
|
9 |
+
**/__pycache__/
|
.gitignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
tmp/
|
2 |
+
.venv/
|
3 |
+
.ruff_cache/
|
4 |
+
.mypy_cache/
|
5 |
+
**/__pycache__/
|
6 |
+
.env
|
.python-version
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
3.12
|
Dockerfile
ADDED
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM ghcr.io/astral-sh/uv:debian-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
RUN groupadd nonroot && useradd -g nonroot nonroot
|
6 |
+
|
7 |
+
# Enable bytecode compilation, Copy from the cache instead of linking since it's a mounted volume
|
8 |
+
ENV UV_COMPILE_BYTECODE=1 \
|
9 |
+
UV_LINK_MODE=copy
|
10 |
+
|
11 |
+
# skipcq: DOK-DL3008
|
12 |
+
RUN apt-get update && \
|
13 |
+
apt-get install -qq -y --no-install-recommends espeak-ng && \
|
14 |
+
apt-get clean && \
|
15 |
+
rm -rf /var/lib/apt/lists/*
|
16 |
+
|
17 |
+
# Install the project's dependencies using the lockfile and settings
|
18 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
19 |
+
--mount=type=bind,source=uv.lock,target=uv.lock \
|
20 |
+
--mount=type=bind,source=pyproject.toml,target=pyproject.toml \
|
21 |
+
--mount=type=bind,source=.python-version,target=.python-version \
|
22 |
+
uv sync --frozen --no-install-project --no-dev
|
23 |
+
|
24 |
+
COPY . /app
|
25 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
26 |
+
uv sync --frozen --no-dev;
|
27 |
+
|
28 |
+
# Place executables in the environment at the front of the path
|
29 |
+
ENV PATH=/app/.venv/bin:$PATH
|
30 |
+
|
31 |
+
USER nonroot
|
32 |
+
# Reset the entrypoint, don't invoke `uv`
|
33 |
+
ENTRYPOINT []
|
34 |
+
|
35 |
+
CMD ["python", "src/vocalizr"]
|
README.md
CHANGED
@@ -1,12 +1,9 @@
|
|
1 |
-
---
|
2 |
-
title: Vocalizr
|
3 |
-
emoji:
|
4 |
-
colorFrom: purple
|
5 |
-
colorTo: yellow
|
6 |
-
sdk:
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
+
---
|
2 |
+
title: Vocalizr
|
3 |
+
emoji: π
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: yellow
|
6 |
+
sdk: docker
|
7 |
+
---
|
8 |
+
|
9 |
+
# Vocalizr: Voice Generator part of the Chatacter Backend
|
|
|
|
|
|
pyproject.toml
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[project]
|
2 |
+
name = "vocalizr"
|
3 |
+
version = "0.1.0"
|
4 |
+
description = "Voice Generator part of the Chatacter Backend"
|
5 |
+
readme = "README.md"
|
6 |
+
requires-python = ">=3.12"
|
7 |
+
dependencies = [
|
8 |
+
"en-core-web-sm",
|
9 |
+
"gradio[mcp]>=5.29.0",
|
10 |
+
"kokoro>=0.9.4",
|
11 |
+
"soundfile>=0.13.1",
|
12 |
+
]
|
13 |
+
|
14 |
+
[build-system]
|
15 |
+
requires = ["hatchling"]
|
16 |
+
build-backend = "hatchling.build"
|
17 |
+
|
18 |
+
[project.scripts]
|
19 |
+
vocalizr = "vocalizr.__main__:main"
|
20 |
+
|
21 |
+
[dependency-groups]
|
22 |
+
dev = [
|
23 |
+
"mypy>=1.15.0",
|
24 |
+
"pylint>=3.3.7",
|
25 |
+
"pyrefly>=0.14.0",
|
26 |
+
"ruff>=0.11.8",
|
27 |
+
"typos>=1.32.0",
|
28 |
+
"black>=25.1.0",
|
29 |
+
"pyright>=1.1.400",
|
30 |
+
"watchfiles>=1.0.5",
|
31 |
+
"huggingface-hub[hf-transfer]>=0.31.1",
|
32 |
+
]
|
33 |
+
|
34 |
+
[tool.typos.default.extend-words]
|
35 |
+
Chatacter = "Chatacter"
|
36 |
+
|
37 |
+
[tool.pyrefly]
|
38 |
+
python_interpreter = ".venv/Scripts/python"
|
39 |
+
|
40 |
+
[tool.mypy]
|
41 |
+
disable = ["E1101"]
|
42 |
+
ignore_missing_imports = true
|
43 |
+
|
44 |
+
[tool.pylint]
|
45 |
+
disable = ["E1101", "C0114"]
|
46 |
+
|
47 |
+
[tool.uv.sources]
|
48 |
+
en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }
|
requirements.txt
ADDED
@@ -0,0 +1,425 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This file was autogenerated by uv via the following command:
|
2 |
+
# uv export --no-hashes --no-editable --no-dev -o requirements.txt
|
3 |
+
.
|
4 |
+
addict==2.4.0
|
5 |
+
# via misaki
|
6 |
+
aiofiles==24.1.0
|
7 |
+
# via gradio
|
8 |
+
annotated-types==0.7.0
|
9 |
+
# via pydantic
|
10 |
+
anyio==4.9.0
|
11 |
+
# via
|
12 |
+
# gradio
|
13 |
+
# httpx
|
14 |
+
# mcp
|
15 |
+
# sse-starlette
|
16 |
+
# starlette
|
17 |
+
attrs==25.3.0
|
18 |
+
# via
|
19 |
+
# csvw
|
20 |
+
# jsonschema
|
21 |
+
# phonemizer-fork
|
22 |
+
# referencing
|
23 |
+
audioop-lts==0.2.1 ; python_full_version >= '3.13'
|
24 |
+
# via gradio
|
25 |
+
babel==2.17.0
|
26 |
+
# via csvw
|
27 |
+
blis==1.3.0
|
28 |
+
# via thinc
|
29 |
+
catalogue==2.0.10
|
30 |
+
# via
|
31 |
+
# spacy
|
32 |
+
# srsly
|
33 |
+
# thinc
|
34 |
+
certifi==2025.4.26
|
35 |
+
# via
|
36 |
+
# httpcore
|
37 |
+
# httpx
|
38 |
+
# requests
|
39 |
+
cffi==1.17.1
|
40 |
+
# via soundfile
|
41 |
+
charset-normalizer==3.4.2
|
42 |
+
# via requests
|
43 |
+
click==8.1.8
|
44 |
+
# via
|
45 |
+
# typer
|
46 |
+
# uvicorn
|
47 |
+
cloudpathlib==0.21.0
|
48 |
+
# via weasel
|
49 |
+
colorama==0.4.6
|
50 |
+
# via
|
51 |
+
# click
|
52 |
+
# csvw
|
53 |
+
# loguru
|
54 |
+
# tqdm
|
55 |
+
# wasabi
|
56 |
+
confection==0.1.5
|
57 |
+
# via
|
58 |
+
# thinc
|
59 |
+
# weasel
|
60 |
+
csvw==3.5.1
|
61 |
+
# via segments
|
62 |
+
curated-tokenizers==0.0.9
|
63 |
+
# via spacy-curated-transformers
|
64 |
+
curated-transformers==0.1.1
|
65 |
+
# via spacy-curated-transformers
|
66 |
+
cymem==2.0.11
|
67 |
+
# via
|
68 |
+
# preshed
|
69 |
+
# spacy
|
70 |
+
# thinc
|
71 |
+
dlinfo==2.0.0
|
72 |
+
# via phonemizer-fork
|
73 |
+
docopt==0.6.2
|
74 |
+
# via num2words
|
75 |
+
en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
|
76 |
+
# via voice-generator
|
77 |
+
espeakng-loader==0.2.4
|
78 |
+
# via misaki
|
79 |
+
fastapi==0.115.12
|
80 |
+
# via gradio
|
81 |
+
ffmpy==0.5.0
|
82 |
+
# via gradio
|
83 |
+
filelock==3.18.0
|
84 |
+
# via
|
85 |
+
# huggingface-hub
|
86 |
+
# torch
|
87 |
+
# transformers
|
88 |
+
fsspec==2025.3.2
|
89 |
+
# via
|
90 |
+
# gradio-client
|
91 |
+
# huggingface-hub
|
92 |
+
# torch
|
93 |
+
gradio==5.29.0
|
94 |
+
# via voice-generator
|
95 |
+
gradio-client==1.10.0
|
96 |
+
# via gradio
|
97 |
+
groovy==0.1.2
|
98 |
+
# via gradio
|
99 |
+
h11==0.16.0
|
100 |
+
# via
|
101 |
+
# httpcore
|
102 |
+
# uvicorn
|
103 |
+
httpcore==1.0.9
|
104 |
+
# via httpx
|
105 |
+
httpx==0.28.1
|
106 |
+
# via
|
107 |
+
# gradio
|
108 |
+
# gradio-client
|
109 |
+
# mcp
|
110 |
+
# safehttpx
|
111 |
+
httpx-sse==0.4.0
|
112 |
+
# via mcp
|
113 |
+
huggingface-hub==0.30.2
|
114 |
+
# via
|
115 |
+
# gradio
|
116 |
+
# gradio-client
|
117 |
+
# kokoro
|
118 |
+
# tokenizers
|
119 |
+
# transformers
|
120 |
+
idna==3.10
|
121 |
+
# via
|
122 |
+
# anyio
|
123 |
+
# httpx
|
124 |
+
# requests
|
125 |
+
isodate==0.7.2
|
126 |
+
# via csvw
|
127 |
+
jinja2==3.1.6
|
128 |
+
# via
|
129 |
+
# gradio
|
130 |
+
# spacy
|
131 |
+
# torch
|
132 |
+
joblib==1.5.0
|
133 |
+
# via phonemizer-fork
|
134 |
+
jsonschema==4.23.0
|
135 |
+
# via csvw
|
136 |
+
jsonschema-specifications==2025.4.1
|
137 |
+
# via jsonschema
|
138 |
+
kokoro==0.9.4
|
139 |
+
# via voice-generator
|
140 |
+
langcodes==3.5.0
|
141 |
+
# via spacy
|
142 |
+
language-data==1.3.0
|
143 |
+
# via langcodes
|
144 |
+
language-tags==1.2.0
|
145 |
+
# via csvw
|
146 |
+
loguru==0.7.3
|
147 |
+
# via kokoro
|
148 |
+
marisa-trie==1.2.1
|
149 |
+
# via language-data
|
150 |
+
markdown-it-py==3.0.0
|
151 |
+
# via rich
|
152 |
+
markupsafe==3.0.2
|
153 |
+
# via
|
154 |
+
# gradio
|
155 |
+
# jinja2
|
156 |
+
mcp==1.7.1
|
157 |
+
# via gradio
|
158 |
+
mdurl==0.1.2
|
159 |
+
# via markdown-it-py
|
160 |
+
misaki==0.9.4
|
161 |
+
# via kokoro
|
162 |
+
mpmath==1.3.0
|
163 |
+
# via sympy
|
164 |
+
murmurhash==1.0.12
|
165 |
+
# via
|
166 |
+
# preshed
|
167 |
+
# spacy
|
168 |
+
# thinc
|
169 |
+
networkx==3.4.2
|
170 |
+
# via torch
|
171 |
+
num2words==0.5.14
|
172 |
+
# via misaki
|
173 |
+
numpy==2.2.5
|
174 |
+
# via
|
175 |
+
# blis
|
176 |
+
# gradio
|
177 |
+
# kokoro
|
178 |
+
# pandas
|
179 |
+
# soundfile
|
180 |
+
# spacy
|
181 |
+
# thinc
|
182 |
+
# transformers
|
183 |
+
nvidia-cublas-cu12==12.6.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
184 |
+
# via
|
185 |
+
# nvidia-cudnn-cu12
|
186 |
+
# nvidia-cusolver-cu12
|
187 |
+
# torch
|
188 |
+
nvidia-cuda-cupti-cu12==12.6.80 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
189 |
+
# via torch
|
190 |
+
nvidia-cuda-nvrtc-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
191 |
+
# via torch
|
192 |
+
nvidia-cuda-runtime-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
193 |
+
# via torch
|
194 |
+
nvidia-cudnn-cu12==9.5.1.17 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
195 |
+
# via torch
|
196 |
+
nvidia-cufft-cu12==11.3.0.4 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
197 |
+
# via torch
|
198 |
+
nvidia-cufile-cu12==1.11.1.6 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
199 |
+
# via torch
|
200 |
+
nvidia-curand-cu12==10.3.7.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
201 |
+
# via torch
|
202 |
+
nvidia-cusolver-cu12==11.7.1.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
203 |
+
# via torch
|
204 |
+
nvidia-cusparse-cu12==12.5.4.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
205 |
+
# via
|
206 |
+
# nvidia-cusolver-cu12
|
207 |
+
# torch
|
208 |
+
nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
209 |
+
# via torch
|
210 |
+
nvidia-nccl-cu12==2.26.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
211 |
+
# via torch
|
212 |
+
nvidia-nvjitlink-cu12==12.6.85 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
213 |
+
# via
|
214 |
+
# nvidia-cufft-cu12
|
215 |
+
# nvidia-cusolver-cu12
|
216 |
+
# nvidia-cusparse-cu12
|
217 |
+
# torch
|
218 |
+
nvidia-nvtx-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
219 |
+
# via torch
|
220 |
+
orjson==3.10.18
|
221 |
+
# via gradio
|
222 |
+
packaging==25.0
|
223 |
+
# via
|
224 |
+
# gradio
|
225 |
+
# gradio-client
|
226 |
+
# huggingface-hub
|
227 |
+
# spacy
|
228 |
+
# thinc
|
229 |
+
# transformers
|
230 |
+
# weasel
|
231 |
+
pandas==2.2.3
|
232 |
+
# via gradio
|
233 |
+
phonemizer-fork==3.3.2
|
234 |
+
# via misaki
|
235 |
+
pillow==11.2.1
|
236 |
+
# via gradio
|
237 |
+
preshed==3.0.9
|
238 |
+
# via
|
239 |
+
# spacy
|
240 |
+
# thinc
|
241 |
+
pycparser==2.22
|
242 |
+
# via cffi
|
243 |
+
pydantic==2.11.4
|
244 |
+
# via
|
245 |
+
# confection
|
246 |
+
# fastapi
|
247 |
+
# gradio
|
248 |
+
# mcp
|
249 |
+
# pydantic-settings
|
250 |
+
# spacy
|
251 |
+
# thinc
|
252 |
+
# weasel
|
253 |
+
pydantic-core==2.33.2
|
254 |
+
# via pydantic
|
255 |
+
pydantic-settings==2.9.1
|
256 |
+
# via mcp
|
257 |
+
pydub==0.25.1
|
258 |
+
# via gradio
|
259 |
+
pygments==2.19.1
|
260 |
+
# via rich
|
261 |
+
pyparsing==3.2.3
|
262 |
+
# via rdflib
|
263 |
+
python-dateutil==2.9.0.post0
|
264 |
+
# via
|
265 |
+
# csvw
|
266 |
+
# pandas
|
267 |
+
python-dotenv==1.1.0
|
268 |
+
# via pydantic-settings
|
269 |
+
python-multipart==0.0.20
|
270 |
+
# via
|
271 |
+
# gradio
|
272 |
+
# mcp
|
273 |
+
pytz==2025.2
|
274 |
+
# via pandas
|
275 |
+
pyyaml==6.0.2
|
276 |
+
# via
|
277 |
+
# gradio
|
278 |
+
# huggingface-hub
|
279 |
+
# transformers
|
280 |
+
rdflib==7.1.4
|
281 |
+
# via csvw
|
282 |
+
referencing==0.36.2
|
283 |
+
# via
|
284 |
+
# jsonschema
|
285 |
+
# jsonschema-specifications
|
286 |
+
regex==2024.11.6
|
287 |
+
# via
|
288 |
+
# curated-tokenizers
|
289 |
+
# misaki
|
290 |
+
# segments
|
291 |
+
# transformers
|
292 |
+
requests==2.32.3
|
293 |
+
# via
|
294 |
+
# csvw
|
295 |
+
# huggingface-hub
|
296 |
+
# spacy
|
297 |
+
# transformers
|
298 |
+
# weasel
|
299 |
+
rfc3986==1.5.0
|
300 |
+
# via csvw
|
301 |
+
rich==14.0.0
|
302 |
+
# via typer
|
303 |
+
rpds-py==0.24.0
|
304 |
+
# via
|
305 |
+
# jsonschema
|
306 |
+
# referencing
|
307 |
+
ruff==0.11.8 ; sys_platform != 'emscripten'
|
308 |
+
# via gradio
|
309 |
+
safehttpx==0.1.6
|
310 |
+
# via gradio
|
311 |
+
safetensors==0.5.3
|
312 |
+
# via transformers
|
313 |
+
segments==2.3.0
|
314 |
+
# via phonemizer-fork
|
315 |
+
semantic-version==2.10.0
|
316 |
+
# via gradio
|
317 |
+
setuptools==80.3.1
|
318 |
+
# via
|
319 |
+
# marisa-trie
|
320 |
+
# spacy
|
321 |
+
# thinc
|
322 |
+
# torch
|
323 |
+
# triton
|
324 |
+
shellingham==1.5.4
|
325 |
+
# via typer
|
326 |
+
six==1.17.0
|
327 |
+
# via python-dateutil
|
328 |
+
smart-open==7.1.0
|
329 |
+
# via weasel
|
330 |
+
sniffio==1.3.1
|
331 |
+
# via anyio
|
332 |
+
soundfile==0.13.1
|
333 |
+
# via voice-generator
|
334 |
+
spacy==3.8.5
|
335 |
+
# via misaki
|
336 |
+
spacy-curated-transformers==0.3.0
|
337 |
+
# via misaki
|
338 |
+
spacy-legacy==3.0.12
|
339 |
+
# via spacy
|
340 |
+
spacy-loggers==1.0.5
|
341 |
+
# via spacy
|
342 |
+
srsly==2.5.1
|
343 |
+
# via
|
344 |
+
# confection
|
345 |
+
# spacy
|
346 |
+
# thinc
|
347 |
+
# weasel
|
348 |
+
sse-starlette==2.3.4
|
349 |
+
# via mcp
|
350 |
+
starlette==0.46.2
|
351 |
+
# via
|
352 |
+
# fastapi
|
353 |
+
# gradio
|
354 |
+
# mcp
|
355 |
+
# sse-starlette
|
356 |
+
sympy==1.14.0
|
357 |
+
# via torch
|
358 |
+
thinc==8.3.6
|
359 |
+
# via spacy
|
360 |
+
tokenizers==0.21.1
|
361 |
+
# via transformers
|
362 |
+
tomlkit==0.13.2
|
363 |
+
# via gradio
|
364 |
+
torch==2.7.0
|
365 |
+
# via
|
366 |
+
# curated-transformers
|
367 |
+
# kokoro
|
368 |
+
# spacy-curated-transformers
|
369 |
+
tqdm==4.67.1
|
370 |
+
# via
|
371 |
+
# huggingface-hub
|
372 |
+
# spacy
|
373 |
+
# transformers
|
374 |
+
transformers==4.51.3
|
375 |
+
# via kokoro
|
376 |
+
triton==3.3.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
|
377 |
+
# via torch
|
378 |
+
typer==0.15.3
|
379 |
+
# via
|
380 |
+
# gradio
|
381 |
+
# spacy
|
382 |
+
# weasel
|
383 |
+
typing-extensions==4.13.2
|
384 |
+
# via
|
385 |
+
# anyio
|
386 |
+
# fastapi
|
387 |
+
# gradio
|
388 |
+
# gradio-client
|
389 |
+
# huggingface-hub
|
390 |
+
# phonemizer-fork
|
391 |
+
# pydantic
|
392 |
+
# pydantic-core
|
393 |
+
# referencing
|
394 |
+
# torch
|
395 |
+
# typer
|
396 |
+
# typing-inspection
|
397 |
+
typing-inspection==0.4.0
|
398 |
+
# via
|
399 |
+
# pydantic
|
400 |
+
# pydantic-settings
|
401 |
+
tzdata==2025.2
|
402 |
+
# via pandas
|
403 |
+
uritemplate==4.1.1
|
404 |
+
# via csvw
|
405 |
+
urllib3==2.4.0
|
406 |
+
# via
|
407 |
+
# gradio
|
408 |
+
# requests
|
409 |
+
uvicorn==0.34.2 ; sys_platform != 'emscripten'
|
410 |
+
# via
|
411 |
+
# gradio
|
412 |
+
# mcp
|
413 |
+
wasabi==1.1.3
|
414 |
+
# via
|
415 |
+
# spacy
|
416 |
+
# thinc
|
417 |
+
# weasel
|
418 |
+
weasel==0.4.1
|
419 |
+
# via spacy
|
420 |
+
websockets==15.0.1
|
421 |
+
# via gradio-client
|
422 |
+
win32-setctime==1.2.0 ; sys_platform == 'win32'
|
423 |
+
# via loguru
|
424 |
+
wrapt==1.17.2
|
425 |
+
# via smart-open
|
src/vocalizr/__init__.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from os import getenv
|
2 |
+
from pathlib import Path
|
3 |
+
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from kokoro import KPipeline
|
6 |
+
from loguru import logger
|
7 |
+
from torch import cuda
|
8 |
+
|
9 |
+
load_dotenv()
|
10 |
+
|
11 |
+
BASE_DIR: Path = Path(__file__).parent.parent.parent
|
12 |
+
DEBUG: bool = getenv(key="DEBUG", default="False").lower() == "true"
|
13 |
+
CHAR_LIMIT: int = int(getenv(key="CHAR_LIMIT", default="5000"))
|
14 |
+
SERVER_NAME: str = getenv(key="GRADIO_SERVER_NAME", default="localhost")
|
15 |
+
SERVER_PORT: int = int(getenv(key="GRADIO_SERVER_PORT", default="8080"))
|
16 |
+
PIPELINE: KPipeline = KPipeline(lang_code="a")
|
17 |
+
CUDA_AVAILABLE: bool = cuda.is_available()
|
18 |
+
|
19 |
+
logger.info(f"CUDA Available: {CUDA_AVAILABLE}")
|
20 |
+
|
21 |
+
CHOICES: dict[str, str] = {
|
22 |
+
"πΊπΈ πΊ Heart β€οΈ": "af_heart",
|
23 |
+
"πΊπΈ πΊ Bella π₯": "af_bella",
|
24 |
+
"πΊπΈ πΊ Nicole π§": "af_nicole",
|
25 |
+
"πΊπΈ πΊ Aoede": "af_aoede",
|
26 |
+
"πΊπΈ πΊ Kore": "af_kore",
|
27 |
+
"πΊπΈ πΊ Sarah": "af_sarah",
|
28 |
+
"πΊπΈ πΊ Nova": "af_nova",
|
29 |
+
"πΊπΈ πΊ Sky": "af_sky",
|
30 |
+
"πΊπΈ πΊ Alloy": "af_alloy",
|
31 |
+
"πΊπΈ πΊ Jessica": "af_jessica",
|
32 |
+
"πΊπΈ πΊ River": "af_river",
|
33 |
+
"πΊπΈ πΉ Michael": "am_michael",
|
34 |
+
"πΊπΈ πΉ Fenrir": "am_fenrir",
|
35 |
+
"πΊπΈ πΉ Puck": "am_puck",
|
36 |
+
"πΊπΈ πΉ Echo": "am_echo",
|
37 |
+
"πΊπΈ πΉ Eric": "am_eric",
|
38 |
+
"πΊπΈ πΉ Liam": "am_liam",
|
39 |
+
"πΊπΈ πΉ Onyx": "am_onyx",
|
40 |
+
"πΊπΈ πΉ Santa": "am_santa",
|
41 |
+
"πΊπΈ πΉ Adam": "am_adam",
|
42 |
+
"π¬π§ πΊ Emma": "bf_emma",
|
43 |
+
"π¬π§ πΊ Isabella": "bf_isabella",
|
44 |
+
"π¬π§ πΊ Alice": "bf_alice",
|
45 |
+
"π¬π§ πΊ Lily": "bf_lily",
|
46 |
+
"π¬π§ πΉ George": "bm_george",
|
47 |
+
"π¬π§ πΉ Fable": "bm_fable",
|
48 |
+
"π¬π§ πΉ Lewis": "bm_lewis",
|
49 |
+
"π¬π§ πΉ Daniel": "bm_daniel",
|
50 |
+
}
|
src/vocalizr/__main__.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from gradio import Blocks
|
2 |
+
|
3 |
+
from vocalizr import DEBUG, SERVER_NAME, SERVER_PORT
|
4 |
+
from vocalizr.gui import app_block
|
5 |
+
|
6 |
+
|
7 |
+
def main() -> None:
|
8 |
+
"""Launch the Gradio voice generation web application."""
|
9 |
+
app: Blocks = app_block()
|
10 |
+
app.launch(
|
11 |
+
server_name=SERVER_NAME,
|
12 |
+
server_port=SERVER_PORT,
|
13 |
+
debug=DEBUG,
|
14 |
+
mcp_server=True,
|
15 |
+
show_api=True,
|
16 |
+
enable_monitoring=True,
|
17 |
+
show_error=True,
|
18 |
+
)
|
19 |
+
|
20 |
+
|
21 |
+
if __name__ == "__main__":
|
22 |
+
main()
|
src/vocalizr/gui.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from gradio import (
|
2 |
+
Audio,
|
3 |
+
Blocks,
|
4 |
+
Button,
|
5 |
+
Checkbox,
|
6 |
+
Column,
|
7 |
+
Dropdown,
|
8 |
+
Row,
|
9 |
+
Slider,
|
10 |
+
Textbox,
|
11 |
+
)
|
12 |
+
|
13 |
+
from vocalizr import CHAR_LIMIT, CHOICES, CUDA_AVAILABLE
|
14 |
+
from vocalizr.model import generate_audio_for_text
|
15 |
+
|
16 |
+
|
17 |
+
def app_block() -> Blocks:
|
18 |
+
"""Create and return the main application interface.
|
19 |
+
|
20 |
+
:return: Blocks: The complete Gradio application interface
|
21 |
+
"""
|
22 |
+
with Blocks() as app:
|
23 |
+
with Row():
|
24 |
+
with Column():
|
25 |
+
text: Textbox = Textbox(
|
26 |
+
label="Input Text",
|
27 |
+
info=(
|
28 |
+
f"""
|
29 |
+
Up to ~500 characters per Generate,
|
30 |
+
or {"β" if CHAR_LIMIT is None else CHAR_LIMIT}
|
31 |
+
characters per Stream
|
32 |
+
"""
|
33 |
+
),
|
34 |
+
)
|
35 |
+
with Row():
|
36 |
+
voice: Dropdown = Dropdown(
|
37 |
+
choices=list(CHOICES.items()),
|
38 |
+
value="af_heart",
|
39 |
+
label="Voice",
|
40 |
+
info="Quality and availability vary by language",
|
41 |
+
)
|
42 |
+
Dropdown(
|
43 |
+
choices=[("GPU π", True), ("CPU π", False)],
|
44 |
+
value=CUDA_AVAILABLE,
|
45 |
+
label="Hardware",
|
46 |
+
info="GPU is usually faster, but has a usage quota",
|
47 |
+
interactive=CUDA_AVAILABLE,
|
48 |
+
)
|
49 |
+
save_file = Checkbox(
|
50 |
+
label="Save Audio", info="Save audio to local storage"
|
51 |
+
)
|
52 |
+
speed: Slider = Slider(
|
53 |
+
minimum=0.5,
|
54 |
+
maximum=2,
|
55 |
+
value=1,
|
56 |
+
step=0.1,
|
57 |
+
label="Speed",
|
58 |
+
)
|
59 |
+
with Column():
|
60 |
+
out_audio: Audio = Audio(
|
61 |
+
label="Output Audio",
|
62 |
+
interactive=False,
|
63 |
+
streaming=False,
|
64 |
+
autoplay=True,
|
65 |
+
)
|
66 |
+
generate_btn: Button = Button("Generate", variant="primary")
|
67 |
+
generate_btn.click(
|
68 |
+
fn=generate_audio_for_text,
|
69 |
+
inputs=[text, voice, speed, save_file],
|
70 |
+
outputs=[out_audio],
|
71 |
+
)
|
72 |
+
return app
|
src/vocalizr/model.py
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import datetime
|
2 |
+
from os import makedirs
|
3 |
+
|
4 |
+
from gradio import Error
|
5 |
+
from loguru import logger
|
6 |
+
from numpy import ndarray
|
7 |
+
from soundfile import write
|
8 |
+
from torch import Tensor
|
9 |
+
|
10 |
+
from vocalizr import BASE_DIR, CHAR_LIMIT, PIPELINE
|
11 |
+
|
12 |
+
|
13 |
+
def save_file_wav(audio: ndarray) -> None:
|
14 |
+
"""Save audio data to a WAV file in the 'results' directory.
|
15 |
+
|
16 |
+
Creates a timestamped WAV file in the 'results' directory with
|
17 |
+
the provided audio data at a fixed sample rate of 24,000 Hz.
|
18 |
+
|
19 |
+
:param audio: Data to save.
|
20 |
+
:return: None
|
21 |
+
:raise OSError: If an error occurs while saving the file.
|
22 |
+
"""
|
23 |
+
makedirs(name="results", exist_ok=True)
|
24 |
+
filename = f"{BASE_DIR}/results/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.wav"
|
25 |
+
try:
|
26 |
+
logger.info(f"Saving audio to {filename}")
|
27 |
+
write(filename, audio, 24000)
|
28 |
+
except OSError as e:
|
29 |
+
raise OSError(f"Failed to save audio to {filename}: {e}") from e
|
30 |
+
|
31 |
+
|
32 |
+
def generate_audio_for_text(
|
33 |
+
text: str, voice="af_heart", speed=1, save_file: bool = False
|
34 |
+
) -> tuple[int, ndarray]:
|
35 |
+
"""Generate audio for the input text.
|
36 |
+
|
37 |
+
:param text: Input text to convert to speech
|
38 |
+
:param voice: Voice identifier
|
39 |
+
:param speed: Speech speed multiplier
|
40 |
+
:param save_file: If to save the audio file to disk.
|
41 |
+
:return: Tuple containing the audio sample rate and raw audio data.
|
42 |
+
:raise Error: If an error occurs during generation.
|
43 |
+
"""
|
44 |
+
text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
|
45 |
+
try:
|
46 |
+
for _, _, audio in PIPELINE(text, voice, speed):
|
47 |
+
audio = Tensor(audio).numpy()
|
48 |
+
if save_file:
|
49 |
+
save_file_wav(audio)
|
50 |
+
return 24000, audio
|
51 |
+
except Error as e:
|
52 |
+
raise Error(str(e)) from e
|
53 |
+
raise RuntimeError("No audio generated")
|
uv.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|