Spaces:

AlphaSphereDotAI
/

Vocalizr

Sleeping

App Files Files Community

MH0386 commited on 16 days ago

Commit

b9a47ba

verified ·

1 Parent(s): 8daac41

Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

.deepsource.toml +24 -0
.dockerignore +9 -0
.gitignore +6 -0
.python-version +1 -0
Dockerfile +35 -0
README.md +9 -12
pyproject.toml +48 -0
requirements.txt +425 -0
src/vocalizr/__init__.py +50 -0
src/vocalizr/__main__.py +22 -0
src/vocalizr/gui.py +72 -0
src/vocalizr/model.py +53 -0
uv.lock +0 -0

.deepsource.toml ADDED Viewed

	@@ -0,0 +1,24 @@

+version = 1
+[[analyzers]]
+name = "python"
+dependency_file_paths = ["requirements.txt", "pyproject.toml"]
+  [analyzers.meta]
+  runtime_version = "3.x.x"
+  type_checker = "mypy"
+[[analyzers]]
+name = "docker"
+[[transformers]]
+name = "ruff"
+[[transformers]]
+name = "isort"
+[[analyzers]]
+name = "secrets"
+[[analyzers]]
+name = "terraform"

.dockerignore ADDED Viewed

	@@ -0,0 +1,9 @@

+.flox/
+.github/
+.vscode/
+README.md
+renovate.json
+tmp/
+.ruff_cache/
+.mypy_cache/
+**/__pycache__/

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+tmp/
+.venv/
+.ruff_cache/
+.mypy_cache/
+**/__pycache__/
+.env

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.12

Dockerfile ADDED Viewed

	@@ -0,0 +1,35 @@

+FROM ghcr.io/astral-sh/uv:debian-slim
+WORKDIR /app
+RUN groupadd nonroot && useradd -g nonroot nonroot
+# Enable bytecode compilation, Copy from the cache instead of linking since it's a mounted volume
+ENV UV_COMPILE_BYTECODE=1 \
+    UV_LINK_MODE=copy
+# skipcq: DOK-DL3008
+RUN apt-get update && \
+    apt-get install -qq -y --no-install-recommends espeak-ng && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+# Install the project's dependencies using the lockfile and settings
+RUN --mount=type=cache,target=/root/.cache/uv \
+    --mount=type=bind,source=uv.lock,target=uv.lock \
+    --mount=type=bind,source=pyproject.toml,target=pyproject.toml \
+    --mount=type=bind,source=.python-version,target=.python-version \
+    uv sync --frozen --no-install-project --no-dev
+COPY . /app
+RUN --mount=type=cache,target=/root/.cache/uv \
+    uv sync --frozen --no-dev;
+# Place executables in the environment at the front of the path
+ENV PATH=/app/.venv/bin:$PATH
+USER nonroot
+# Reset the entrypoint, don't invoke `uv`
+ENTRYPOINT []
+CMD ["python", "src/vocalizr"]

README.md CHANGED Viewed

@@ -1,12 +1,9 @@
----
-title: Vocalizr
-emoji: 📊
-colorFrom: purple
-colorTo: yellow
-sdk: gradio
-sdk_version: 5.29.0
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Vocalizr
+emoji: 🔊
+colorFrom: purple
+colorTo: yellow
+sdk: docker
+---
+# Vocalizr: Voice Generator part of the Chatacter Backend

pyproject.toml ADDED Viewed

	@@ -0,0 +1,48 @@

+[project]
+name = "vocalizr"
+version = "0.1.0"
+description = "Voice Generator part of the Chatacter Backend"
+readme = "README.md"
+requires-python = ">=3.12"
+dependencies = [
+    "en-core-web-sm",
+    "gradio[mcp]>=5.29.0",
+    "kokoro>=0.9.4",
+    "soundfile>=0.13.1",
+]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project.scripts]
+vocalizr = "vocalizr.__main__:main"
+[dependency-groups]
+dev = [
+    "mypy>=1.15.0",
+    "pylint>=3.3.7",
+    "pyrefly>=0.14.0",
+    "ruff>=0.11.8",
+    "typos>=1.32.0",
+    "black>=25.1.0",
+    "pyright>=1.1.400",
+    "watchfiles>=1.0.5",
+    "huggingface-hub[hf-transfer]>=0.31.1",
+]
+[tool.typos.default.extend-words]
+Chatacter = "Chatacter"
+[tool.pyrefly]
+python_interpreter = ".venv/Scripts/python"
+[tool.mypy]
+disable = ["E1101"]
+ignore_missing_imports = true
+[tool.pylint]
+disable = ["E1101", "C0114"]
+[tool.uv.sources]
+en-core-web-sm = { url = "https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl" }

requirements.txt ADDED Viewed

	@@ -0,0 +1,425 @@

+# This file was autogenerated by uv via the following command:
+#    uv export --no-hashes --no-editable --no-dev -o requirements.txt
+.
+addict==2.4.0
+    # via misaki
+aiofiles==24.1.0
+    # via gradio
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.9.0
+    # via
+    #   gradio
+    #   httpx
+    #   mcp
+    #   sse-starlette
+    #   starlette
+attrs==25.3.0
+    # via
+    #   csvw
+    #   jsonschema
+    #   phonemizer-fork
+    #   referencing
+audioop-lts==0.2.1 ; python_full_version >= '3.13'
+    # via gradio
+babel==2.17.0
+    # via csvw
+blis==1.3.0
+    # via thinc
+catalogue==2.0.10
+    # via
+    #   spacy
+    #   srsly
+    #   thinc
+certifi==2025.4.26
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==1.17.1
+    # via soundfile
+charset-normalizer==3.4.2
+    # via requests
+click==8.1.8
+    # via
+    #   typer
+    #   uvicorn
+cloudpathlib==0.21.0
+    # via weasel
+colorama==0.4.6
+    # via
+    #   click
+    #   csvw
+    #   loguru
+    #   tqdm
+    #   wasabi
+confection==0.1.5
+    # via
+    #   thinc
+    #   weasel
+csvw==3.5.1
+    # via segments
+curated-tokenizers==0.0.9
+    # via spacy-curated-transformers
+curated-transformers==0.1.1
+    # via spacy-curated-transformers
+cymem==2.0.11
+    # via
+    #   preshed
+    #   spacy
+    #   thinc
+dlinfo==2.0.0
+    # via phonemizer-fork
+docopt==0.6.2
+    # via num2words
+en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl
+    # via voice-generator
+espeakng-loader==0.2.4
+    # via misaki
+fastapi==0.115.12
+    # via gradio
+ffmpy==0.5.0
+    # via gradio
+filelock==3.18.0
+    # via
+    #   huggingface-hub
+    #   torch
+    #   transformers
+fsspec==2025.3.2
+    # via
+    #   gradio-client
+    #   huggingface-hub
+    #   torch
+gradio==5.29.0
+    # via voice-generator
+gradio-client==1.10.0
+    # via gradio
+groovy==0.1.2
+    # via gradio
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via
+    #   gradio
+    #   gradio-client
+    #   mcp
+    #   safehttpx
+httpx-sse==0.4.0
+    # via mcp
+huggingface-hub==0.30.2
+    # via
+    #   gradio
+    #   gradio-client
+    #   kokoro
+    #   tokenizers
+    #   transformers
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+isodate==0.7.2
+    # via csvw
+jinja2==3.1.6
+    # via
+    #   gradio
+    #   spacy
+    #   torch
+joblib==1.5.0
+    # via phonemizer-fork
+jsonschema==4.23.0
+    # via csvw
+jsonschema-specifications==2025.4.1
+    # via jsonschema
+kokoro==0.9.4
+    # via voice-generator
+langcodes==3.5.0
+    # via spacy
+language-data==1.3.0
+    # via langcodes
+language-tags==1.2.0
+    # via csvw
+loguru==0.7.3
+    # via kokoro
+marisa-trie==1.2.1
+    # via language-data
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==3.0.2
+    # via
+    #   gradio
+    #   jinja2
+mcp==1.7.1
+    # via gradio
+mdurl==0.1.2
+    # via markdown-it-py
+misaki==0.9.4
+    # via kokoro
+mpmath==1.3.0
+    # via sympy
+murmurhash==1.0.12
+    # via
+    #   preshed
+    #   spacy
+    #   thinc
+networkx==3.4.2
+    # via torch
+num2words==0.5.14
+    # via misaki
+numpy==2.2.5
+    # via
+    #   blis
+    #   gradio
+    #   kokoro
+    #   pandas
+    #   soundfile
+    #   spacy
+    #   thinc
+    #   transformers
+nvidia-cublas-cu12==12.6.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.6.80 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-runtime-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cudnn-cu12==9.5.1.17 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufft-cu12==11.3.0.4 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufile-cu12==1.11.1.6 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-curand-cu12==10.3.7.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusolver-cu12==11.7.1.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusparse-cu12==12.5.4.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.6.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nccl-cu12==2.26.2 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nvjitlink-cu12==12.6.85 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cufft-cu12
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvtx-cu12==12.6.77 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+orjson==3.10.18
+    # via gradio
+packaging==25.0
+    # via
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   spacy
+    #   thinc
+    #   transformers
+    #   weasel
+pandas==2.2.3
+    # via gradio
+phonemizer-fork==3.3.2
+    # via misaki
+pillow==11.2.1
+    # via gradio
+preshed==3.0.9
+    # via
+    #   spacy
+    #   thinc
+pycparser==2.22
+    # via cffi
+pydantic==2.11.4
+    # via
+    #   confection
+    #   fastapi
+    #   gradio
+    #   mcp
+    #   pydantic-settings
+    #   spacy
+    #   thinc
+    #   weasel
+pydantic-core==2.33.2
+    # via pydantic
+pydantic-settings==2.9.1
+    # via mcp
+pydub==0.25.1
+    # via gradio
+pygments==2.19.1
+    # via rich
+pyparsing==3.2.3
+    # via rdflib
+python-dateutil==2.9.0.post0
+    # via
+    #   csvw
+    #   pandas
+python-dotenv==1.1.0
+    # via pydantic-settings
+python-multipart==0.0.20
+    # via
+    #   gradio
+    #   mcp
+pytz==2025.2
+    # via pandas
+pyyaml==6.0.2
+    # via
+    #   gradio
+    #   huggingface-hub
+    #   transformers
+rdflib==7.1.4
+    # via csvw
+referencing==0.36.2
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2024.11.6
+    # via
+    #   curated-tokenizers
+    #   misaki
+    #   segments
+    #   transformers
+requests==2.32.3
+    # via
+    #   csvw
+    #   huggingface-hub
+    #   spacy
+    #   transformers
+    #   weasel
+rfc3986==1.5.0
+    # via csvw
+rich==14.0.0
+    # via typer
+rpds-py==0.24.0
+    # via
+    #   jsonschema
+    #   referencing
+ruff==0.11.8 ; sys_platform != 'emscripten'
+    # via gradio
+safehttpx==0.1.6
+    # via gradio
+safetensors==0.5.3
+    # via transformers
+segments==2.3.0
+    # via phonemizer-fork
+semantic-version==2.10.0
+    # via gradio
+setuptools==80.3.1
+    # via
+    #   marisa-trie
+    #   spacy
+    #   thinc
+    #   torch
+    #   triton
+shellingham==1.5.4
+    # via typer
+six==1.17.0
+    # via python-dateutil
+smart-open==7.1.0
+    # via weasel
+sniffio==1.3.1
+    # via anyio
+soundfile==0.13.1
+    # via voice-generator
+spacy==3.8.5
+    # via misaki
+spacy-curated-transformers==0.3.0
+    # via misaki
+spacy-legacy==3.0.12
+    # via spacy
+spacy-loggers==1.0.5
+    # via spacy
+srsly==2.5.1
+    # via
+    #   confection
+    #   spacy
+    #   thinc
+    #   weasel
+sse-starlette==2.3.4
+    # via mcp
+starlette==0.46.2
+    # via
+    #   fastapi
+    #   gradio
+    #   mcp
+    #   sse-starlette
+sympy==1.14.0
+    # via torch
+thinc==8.3.6
+    # via spacy
+tokenizers==0.21.1
+    # via transformers
+tomlkit==0.13.2
+    # via gradio
+torch==2.7.0
+    # via
+    #   curated-transformers
+    #   kokoro
+    #   spacy-curated-transformers
+tqdm==4.67.1
+    # via
+    #   huggingface-hub
+    #   spacy
+    #   transformers
+transformers==4.51.3
+    # via kokoro
+triton==3.3.0 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+typer==0.15.3
+    # via
+    #   gradio
+    #   spacy
+    #   weasel
+typing-extensions==4.13.2
+    # via
+    #   anyio
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   phonemizer-fork
+    #   pydantic
+    #   pydantic-core
+    #   referencing
+    #   torch
+    #   typer
+    #   typing-inspection
+typing-inspection==0.4.0
+    # via
+    #   pydantic
+    #   pydantic-settings
+tzdata==2025.2
+    # via pandas
+uritemplate==4.1.1
+    # via csvw
+urllib3==2.4.0
+    # via
+    #   gradio
+    #   requests
+uvicorn==0.34.2 ; sys_platform != 'emscripten'
+    # via
+    #   gradio
+    #   mcp
+wasabi==1.1.3
+    # via
+    #   spacy
+    #   thinc
+    #   weasel
+weasel==0.4.1
+    # via spacy
+websockets==15.0.1
+    # via gradio-client
+win32-setctime==1.2.0 ; sys_platform == 'win32'
+    # via loguru
+wrapt==1.17.2
+    # via smart-open

src/vocalizr/__init__.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from os import getenv
+from pathlib import Path
+from dotenv import load_dotenv
+from kokoro import KPipeline
+from loguru import logger
+from torch import cuda
+load_dotenv()
+BASE_DIR: Path = Path(__file__).parent.parent.parent
+DEBUG: bool = getenv(key="DEBUG", default="False").lower() == "true"
+CHAR_LIMIT: int = int(getenv(key="CHAR_LIMIT", default="5000"))
+SERVER_NAME: str = getenv(key="GRADIO_SERVER_NAME", default="localhost")
+SERVER_PORT: int = int(getenv(key="GRADIO_SERVER_PORT", default="8080"))
+PIPELINE: KPipeline = KPipeline(lang_code="a")
+CUDA_AVAILABLE: bool = cuda.is_available()
+logger.info(f"CUDA Available: {CUDA_AVAILABLE}")
+CHOICES: dict[str, str] = {
+    "🇺🇸 🚺 Heart ❤️": "af_heart",
+    "🇺🇸 🚺 Bella 🔥": "af_bella",
+    "🇺🇸 🚺 Nicole 🎧": "af_nicole",
+    "🇺🇸 🚺 Aoede": "af_aoede",
+    "🇺🇸 🚺 Kore": "af_kore",
+    "🇺🇸 🚺 Sarah": "af_sarah",
+    "🇺🇸 🚺 Nova": "af_nova",
+    "🇺🇸 🚺 Sky": "af_sky",
+    "🇺🇸 🚺 Alloy": "af_alloy",
+    "🇺🇸 🚺 Jessica": "af_jessica",
+    "🇺🇸 🚺 River": "af_river",
+    "🇺🇸 🚹 Michael": "am_michael",
+    "🇺🇸 🚹 Fenrir": "am_fenrir",
+    "🇺🇸 🚹 Puck": "am_puck",
+    "🇺🇸 🚹 Echo": "am_echo",
+    "🇺🇸 🚹 Eric": "am_eric",
+    "🇺🇸 🚹 Liam": "am_liam",
+    "🇺🇸 🚹 Onyx": "am_onyx",
+    "🇺🇸 🚹 Santa": "am_santa",
+    "🇺🇸 🚹 Adam": "am_adam",
+    "🇬🇧 🚺 Emma": "bf_emma",
+    "🇬🇧 🚺 Isabella": "bf_isabella",
+    "🇬🇧 🚺 Alice": "bf_alice",
+    "🇬🇧 🚺 Lily": "bf_lily",
+    "🇬🇧 🚹 George": "bm_george",
+    "🇬🇧 🚹 Fable": "bm_fable",
+    "🇬🇧 🚹 Lewis": "bm_lewis",
+    "🇬🇧 🚹 Daniel": "bm_daniel",
+}

src/vocalizr/__main__.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from gradio import Blocks
+from vocalizr import DEBUG, SERVER_NAME, SERVER_PORT
+from vocalizr.gui import app_block
+def main() -> None:
+    """Launch the Gradio voice generation web application."""
+    app: Blocks = app_block()
+    app.launch(
+        server_name=SERVER_NAME,
+        server_port=SERVER_PORT,
+        debug=DEBUG,
+        mcp_server=True,
+        show_api=True,
+        enable_monitoring=True,
+        show_error=True,
+    )
+if __name__ == "__main__":
+    main()

src/vocalizr/gui.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from gradio import (
+    Audio,
+    Blocks,
+    Button,
+    Checkbox,
+    Column,
+    Dropdown,
+    Row,
+    Slider,
+    Textbox,
+)
+from vocalizr import CHAR_LIMIT, CHOICES, CUDA_AVAILABLE
+from vocalizr.model import generate_audio_for_text
+def app_block() -> Blocks:
+    """Create and return the main application interface.
+    :return: Blocks: The complete Gradio application interface
+    """
+    with Blocks() as app:
+        with Row():
+            with Column():
+                text: Textbox = Textbox(
+                    label="Input Text",
+                    info=(
+                        f"""
+                         Up to ~500 characters per Generate,
+                         or {"∞" if CHAR_LIMIT is None else CHAR_LIMIT}
+                         characters per Stream
+                        """
+                    ),
+                )
+                with Row():
+                    voice: Dropdown = Dropdown(
+                        choices=list(CHOICES.items()),
+                        value="af_heart",
+                        label="Voice",
+                        info="Quality and availability vary by language",
+                    )
+                    Dropdown(
+                        choices=[("GPU 🚀", True), ("CPU 🐌", False)],
+                        value=CUDA_AVAILABLE,
+                        label="Hardware",
+                        info="GPU is usually faster, but has a usage quota",
+                        interactive=CUDA_AVAILABLE,
+                    )
+                    save_file = Checkbox(
+                        label="Save Audio", info="Save audio to local storage"
+                    )
+                speed: Slider = Slider(
+                    minimum=0.5,
+                    maximum=2,
+                    value=1,
+                    step=0.1,
+                    label="Speed",
+                )
+            with Column():
+                out_audio: Audio = Audio(
+                    label="Output Audio",
+                    interactive=False,
+                    streaming=False,
+                    autoplay=True,
+                )
+                generate_btn: Button = Button("Generate", variant="primary")
+        generate_btn.click(
+            fn=generate_audio_for_text,
+            inputs=[text, voice, speed, save_file],
+            outputs=[out_audio],
+        )
+    return app

src/vocalizr/model.py ADDED Viewed

	@@ -0,0 +1,53 @@

+from datetime import datetime
+from os import makedirs
+from gradio import Error
+from loguru import logger
+from numpy import ndarray
+from soundfile import write
+from torch import Tensor
+from vocalizr import BASE_DIR, CHAR_LIMIT, PIPELINE
+def save_file_wav(audio: ndarray) -> None:
+    """Save audio data to a WAV file in the 'results' directory.
+    Creates a timestamped WAV file in the 'results' directory with
+    the provided audio data at a fixed sample rate of 24,000 Hz.
+    :param audio: Data to save.
+    :return: None
+    :raise OSError: If an error occurs while saving the file.
+    """
+    makedirs(name="results", exist_ok=True)
+    filename = f"{BASE_DIR}/results/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.wav"
+    try:
+        logger.info(f"Saving audio to {filename}")
+        write(filename, audio, 24000)
+    except OSError as e:
+        raise OSError(f"Failed to save audio to {filename}: {e}") from e
+def generate_audio_for_text(
+    text: str, voice="af_heart", speed=1, save_file: bool = False
+) -> tuple[int, ndarray]:
+    """Generate audio for the input text.
+    :param text:  Input text to convert to speech
+    :param voice: Voice identifier
+    :param speed: Speech speed multiplier
+    :param save_file: If to save the audio file to disk.
+    :return: Tuple containing the audio sample rate and raw audio data.
+    :raise Error: If an error occurs during generation.
+    """
+    text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
+    try:
+        for _, _, audio in PIPELINE(text, voice, speed):
+            audio = Tensor(audio).numpy()
+            if save_file:
+                save_file_wav(audio)
+            return 24000, audio
+    except Error as e:
+        raise Error(str(e)) from e
+    raise RuntimeError("No audio generated")

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff