Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- src/vocalizr/gui.py +1 -1
- src/vocalizr/model.py +33 -24
src/vocalizr/gui.py
CHANGED
@@ -24,7 +24,7 @@ def app_block() -> Blocks:
|
|
24 |
with Column():
|
25 |
text: Textbox = Textbox(
|
26 |
label="Input Text",
|
27 |
-
info=
|
28 |
)
|
29 |
with Row():
|
30 |
voice: Dropdown = Dropdown(
|
|
|
24 |
with Column():
|
25 |
text: Textbox = Textbox(
|
26 |
label="Input Text",
|
27 |
+
info="Enter your text here",
|
28 |
)
|
29 |
with Row():
|
30 |
voice: Dropdown = Dropdown(
|
src/vocalizr/model.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from typing import Any, Generator, Literal
|
|
|
2 |
from gradio import Error
|
3 |
from kokoro import KPipeline
|
4 |
from loguru import logger
|
@@ -6,20 +7,21 @@ from numpy import float32
|
|
6 |
from numpy.typing import NDArray
|
7 |
from soundfile import write
|
8 |
|
9 |
-
from vocalizr import CHAR_LIMIT, PIPELINE
|
10 |
|
11 |
|
12 |
@logger.catch
|
13 |
def save_file_wav(audio: NDArray[float32]) -> None:
|
14 |
-
"""
|
15 |
-
|
16 |
-
|
17 |
|
18 |
-
|
19 |
-
|
|
|
20 |
|
21 |
-
|
22 |
-
|
23 |
"""
|
24 |
try:
|
25 |
logger.info(f"Saving audio to {AUDIO_FILE_PATH}")
|
@@ -32,26 +34,33 @@ def save_file_wav(audio: NDArray[float32]) -> None:
|
|
32 |
# noinspection PyTypeChecker
|
33 |
@logger.catch
|
34 |
def generate_audio_for_text(
|
35 |
-
text: str,
|
36 |
-
voice: str = "af_heart",
|
37 |
-
speed: float = 1,
|
38 |
-
save_file: bool = False,
|
39 |
) -> Generator[tuple[Literal[24000], NDArray[float32]], Any, None]:
|
40 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
voice (str, optional): Voice identifier. Defaults to "af_heart".
|
45 |
-
speed (float, optional): Speech speed. Defaults to 1.
|
46 |
-
save_file (bool, optional): If to save the audio file to disk. Defaults to False.
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
Error: If audio (NDArray[float32]) is None
|
52 |
|
53 |
-
|
54 |
-
|
|
|
|
|
55 |
"""
|
56 |
try:
|
57 |
text = text if CHAR_LIMIT == -1 else text.strip()[:CHAR_LIMIT]
|
|
|
1 |
from typing import Any, Generator, Literal
|
2 |
+
|
3 |
from gradio import Error
|
4 |
from kokoro import KPipeline
|
5 |
from loguru import logger
|
|
|
7 |
from numpy.typing import NDArray
|
8 |
from soundfile import write
|
9 |
|
10 |
+
from vocalizr import AUDIO_FILE_PATH, CHAR_LIMIT, PIPELINE
|
11 |
|
12 |
|
13 |
@logger.catch
|
14 |
def save_file_wav(audio: NDArray[float32]) -> None:
|
15 |
+
"""
|
16 |
+
Saves an audio array to a WAV file using the specified sampling rate. If the saving
|
17 |
+
operation fails, it logs the exception and raises a RuntimeError.
|
18 |
|
19 |
+
:param audio: The audio data to be saved. Must be a NumPy array of data type
|
20 |
+
float32, representing the audio signal to be written to the file.
|
21 |
+
:type audio: NDArray[float32]
|
22 |
|
23 |
+
:return: This function does not return a value.
|
24 |
+
:rtype: None
|
25 |
"""
|
26 |
try:
|
27 |
logger.info(f"Saving audio to {AUDIO_FILE_PATH}")
|
|
|
34 |
# noinspection PyTypeChecker
|
35 |
@logger.catch
|
36 |
def generate_audio_for_text(
|
37 |
+
text: str, voice: str = "af_heart", speed: float = 1, save_file: bool = False
|
|
|
|
|
|
|
38 |
) -> Generator[tuple[Literal[24000], NDArray[float32]], Any, None]:
|
39 |
+
"""
|
40 |
+
Generates audio from the provided text using the specified voice and speed.
|
41 |
+
It allows saving the generated audio to a file if required. The function
|
42 |
+
yields tuples containing the audio sampling rate and the audio data as a
|
43 |
+
NumPy array.
|
44 |
+
|
45 |
+
:param text: The input text to generate audio for. If CHAR_LIMIT is set to a
|
46 |
+
positive value, the text will be truncated to fit that limit.
|
47 |
+
:type text: str
|
48 |
+
|
49 |
+
:param voice: The voice profile to use for audio generation.
|
50 |
+
Defaults to "af_heart".
|
51 |
+
:type voice: str
|
52 |
|
53 |
+
:param speed: The speed modifier for audio generation. Defaults to 1.0.
|
54 |
+
:type speed: float
|
|
|
|
|
|
|
55 |
|
56 |
+
:param save_file: Whether to save the generated audio to a file. Defaults
|
57 |
+
to False.
|
58 |
+
:type save_file: bool
|
|
|
59 |
|
60 |
+
:return: A generator that yields tuples, where the first element is the
|
61 |
+
fixed sampling rate of 24,000 Hz, and the second element is a NumPy
|
62 |
+
array representing the generated audio data.
|
63 |
+
:rtype: Generator[tuple[Literal[24000], NDArray[float32]], Any, None]
|
64 |
"""
|
65 |
try:
|
66 |
text = text if CHAR_LIMIT == -1 else text.strip()[:CHAR_LIMIT]
|