MH0386 commited on
Commit
1fc10f3
·
verified ·
1 Parent(s): 4f9fbb2

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. src/vocalizr/gui.py +1 -1
  2. src/vocalizr/model.py +33 -24
src/vocalizr/gui.py CHANGED
@@ -24,7 +24,7 @@ def app_block() -> Blocks:
24
  with Column():
25
  text: Textbox = Textbox(
26
  label="Input Text",
27
- info=("""Enter your text here"""),
28
  )
29
  with Row():
30
  voice: Dropdown = Dropdown(
 
24
  with Column():
25
  text: Textbox = Textbox(
26
  label="Input Text",
27
+ info="Enter your text here",
28
  )
29
  with Row():
30
  voice: Dropdown = Dropdown(
src/vocalizr/model.py CHANGED
@@ -1,4 +1,5 @@
1
  from typing import Any, Generator, Literal
 
2
  from gradio import Error
3
  from kokoro import KPipeline
4
  from loguru import logger
@@ -6,20 +7,21 @@ from numpy import float32
6
  from numpy.typing import NDArray
7
  from soundfile import write
8
 
9
- from vocalizr import CHAR_LIMIT, PIPELINE, AUDIO_FILE_PATH
10
 
11
 
12
  @logger.catch
13
  def save_file_wav(audio: NDArray[float32]) -> None:
14
- """Save audio data to a WAV file in the 'results' directory.
15
- Creates a timestamped WAV file in the 'results' directory with
16
- the provided audio data at a fixed sample rate of 24,000 Hz.
17
 
18
- Args:
19
- audio (NDArray[float32]): raw audio data.
 
20
 
21
- Raises:
22
- RuntimeError: If there are problems with saving the audio file locally.
23
  """
24
  try:
25
  logger.info(f"Saving audio to {AUDIO_FILE_PATH}")
@@ -32,26 +34,33 @@ def save_file_wav(audio: NDArray[float32]) -> None:
32
  # noinspection PyTypeChecker
33
  @logger.catch
34
  def generate_audio_for_text(
35
- text: str,
36
- voice: str = "af_heart",
37
- speed: float = 1,
38
- save_file: bool = False,
39
  ) -> Generator[tuple[Literal[24000], NDArray[float32]], Any, None]:
40
- """Generate audio for the input text.
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
- Args:
43
- text (str): Input text to convert to speech
44
- voice (str, optional): Voice identifier. Defaults to "af_heart".
45
- speed (float, optional): Speech speed. Defaults to 1.
46
- save_file (bool, optional): If to save the audio file to disk. Defaults to False.
47
 
48
- Raises:
49
- Error: If text (str) is empty
50
- Error: If audio (NDArray[float32]) is str
51
- Error: If audio (NDArray[float32]) is None
52
 
53
- Yields:
54
- Generator[tuple[Literal[24000], NDArray[float32]], Any, None]: Tuple containing the audio sample rate and raw audio data.
 
 
55
  """
56
  try:
57
  text = text if CHAR_LIMIT == -1 else text.strip()[:CHAR_LIMIT]
 
1
  from typing import Any, Generator, Literal
2
+
3
  from gradio import Error
4
  from kokoro import KPipeline
5
  from loguru import logger
 
7
  from numpy.typing import NDArray
8
  from soundfile import write
9
 
10
+ from vocalizr import AUDIO_FILE_PATH, CHAR_LIMIT, PIPELINE
11
 
12
 
13
  @logger.catch
14
  def save_file_wav(audio: NDArray[float32]) -> None:
15
+ """
16
+ Saves an audio array to a WAV file using the specified sampling rate. If the saving
17
+ operation fails, it logs the exception and raises a RuntimeError.
18
 
19
+ :param audio: The audio data to be saved. Must be a NumPy array of data type
20
+ float32, representing the audio signal to be written to the file.
21
+ :type audio: NDArray[float32]
22
 
23
+ :return: This function does not return a value.
24
+ :rtype: None
25
  """
26
  try:
27
  logger.info(f"Saving audio to {AUDIO_FILE_PATH}")
 
34
  # noinspection PyTypeChecker
35
  @logger.catch
36
  def generate_audio_for_text(
37
+ text: str, voice: str = "af_heart", speed: float = 1, save_file: bool = False
 
 
 
38
  ) -> Generator[tuple[Literal[24000], NDArray[float32]], Any, None]:
39
+ """
40
+ Generates audio from the provided text using the specified voice and speed.
41
+ It allows saving the generated audio to a file if required. The function
42
+ yields tuples containing the audio sampling rate and the audio data as a
43
+ NumPy array.
44
+
45
+ :param text: The input text to generate audio for. If CHAR_LIMIT is set to a
46
+ positive value, the text will be truncated to fit that limit.
47
+ :type text: str
48
+
49
+ :param voice: The voice profile to use for audio generation.
50
+ Defaults to "af_heart".
51
+ :type voice: str
52
 
53
+ :param speed: The speed modifier for audio generation. Defaults to 1.0.
54
+ :type speed: float
 
 
 
55
 
56
+ :param save_file: Whether to save the generated audio to a file. Defaults
57
+ to False.
58
+ :type save_file: bool
 
59
 
60
+ :return: A generator that yields tuples, where the first element is the
61
+ fixed sampling rate of 24,000 Hz, and the second element is a NumPy
62
+ array representing the generated audio data.
63
+ :rtype: Generator[tuple[Literal[24000], NDArray[float32]], Any, None]
64
  """
65
  try:
66
  text = text if CHAR_LIMIT == -1 else text.strip()[:CHAR_LIMIT]