Spaces:

AlphaSphereDotAI
/

Vocalizr

Sleeping

App Files Files Community

MH0386 commited on 12 days ago

Commit

0a1c297

verified ·

1 Parent(s): ed702c2

Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

.github/actions/uv/action.yml +16 -0
.github/dependabot.yml +19 -0
.github/hadolint.yml +6 -0
.github/mergify.yml +18 -0
.github/workflows/code_analysis.yml +51 -0
.github/workflows/docker.yml +71 -0
.github/workflows/github.yaml +76 -0
.github/workflows/huggingface.yml +30 -0
.gitignore +1 -1
src/vocalizr/__init__.py +1 -1
src/vocalizr/__main__.py +1 -1
src/vocalizr/gui.py +19 -12
src/vocalizr/model.py +54 -36

.github/actions/uv/action.yml ADDED Viewed

	@@ -0,0 +1,16 @@

+name: 'Setup UV'
+description: 'Reusable set of steps to Setup UV'
+runs:
+  using: "composite"
+  steps:
+    - name: Install uv
+      uses: astral-sh/setup-uv@v6
+      id: uv
+      with:
+        enable-cache: true
+        cache-dependency-glob: "uv.lock"
+        activate-environment: true
+    - name: Install the project
+      shell: bash
+      if: steps.uv.outputs.cache-hit != 'true'
+      run: uv sync --frozen

.github/dependabot.yml ADDED Viewed

	@@ -0,0 +1,19 @@

+# To get started with Dependabot version updates, you'll need to specify which
+# package ecosystems to update and where the package manifests are located.
+# Please see the documentation for all configuration options:
+# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file
+# version: 2
+# updates:
+#     - package-ecosystem: "docker"
+#       directory: "/"
+#       schedule:
+#           interval: "weekly"
+#     - package-ecosystem: "github-actions"
+#       directory: "/"
+#       schedule:
+#           interval: "weekly"
+#     - package-ecosystem: "uv"
+#       directory: "/"
+#       schedule:
+#           interval: "weekly"

.github/hadolint.yml ADDED Viewed

	@@ -0,0 +1,6 @@

+ignored:
+  - DL3008
+trustedRegistries:
+  - docker.io
+  - ghcr.io

.github/mergify.yml ADDED Viewed

	@@ -0,0 +1,18 @@

+pull_request_rules:
+  - name: Automatically merge Renovate PRs
+    conditions:
+      - author = renovate[bot]
+      - check-success = spelling
+      - check-success = ruff
+      - check-success = pyright
+      - check-success = pylint
+    actions:
+      queue:
+      label:
+        add:
+          - dependencies
+queue_rules:
+  - queue_branch_merge_method: fast-forward
+    allow_queue_branch_edit: true
+    update_method: merge
+    name: default

.github/workflows/code_analysis.yml ADDED Viewed

	@@ -0,0 +1,51 @@

+name: Code Analysis
+on:
+  push:
+  pull_request:
+permissions:
+  contents: read
+jobs:
+  spelling:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Actions Repository
+        uses: actions/checkout@v4
+      - name: Setup UV
+        uses: ./.github/actions/uv
+      - name: Run typos
+        run: uv run typos . --config ./pyproject.toml
+  ruff:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Actions Repository
+        uses: actions/checkout@v4
+      - name: Setup UV
+        uses: ./.github/actions/uv
+      - name: Run Ruff
+        run: uv run ruff check .
+  pyright:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Actions Repository
+        uses: actions/checkout@v4
+      - name: Setup UV
+        uses: ./.github/actions/uv
+      - name: Run Pyright
+        run: uv run pyright
+  pylint:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Actions Repository
+        uses: actions/checkout@v4
+      - name: Setup UV
+        uses: ./.github/actions/uv
+      - name: Analysing the code with Pylint
+        run: uv run pylint $(git ls-files '*.py') --output-format=github --rcfile pyproject.toml
+  dockerfile:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Actions Repository
+        uses: actions/checkout@v4
+      - uses: hadolint/[email protected]
+        with:
+          config: ./.github/hadolint.yml

.github/workflows/docker.yml ADDED Viewed

	@@ -0,0 +1,71 @@

+name: Docker Images
+on:
+  workflow_dispatch:
+  pull_request:
+  push:
+permissions:
+  contents: read
+  packages: write
+  attestations: write
+  id-token: write
+jobs:
+  check_image:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.TOKEN_KEY_GITHUB }}
+      - name: Validate build configuration
+        uses: docker/build-push-action@v6
+        with:
+          call: check
+  build_image:
+    needs: check_image
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.TOKEN_KEY_GITHUB }}
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v3
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+      - name: Docker meta
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ghcr.io/${{ github.repository }}
+          tags: type=raw,value=latest,enable={{is_default_branch}}
+      - name: Build
+        uses: docker/build-push-action@v6
+        with:
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          push: true
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          annotations: ${{ steps.meta.outputs.annotations }}
+  docker_scout:
+    needs:
+      - build_image
+      - check_image
+    runs-on: ubuntu-latest
+    steps:
+      - name: Docker Scout
+        continue-on-error: true
+        uses: docker/scout-action@v1
+        with:
+          command: quickview,cves,recommendations
+          dockerhub-user: mh0386
+          dockerhub-password: ${{ secrets.TOKEN_KEY_DOCKER }}
+          image: ghcr.io/${{ github.repository }}:latest

.github/workflows/github.yaml ADDED Viewed

	@@ -0,0 +1,76 @@

+name: GitHub Release
+on:
+  workflow_dispatch:
+  push:
+    tags:
+      - "[0-9]+.[0-9]+.[0-9]+"
+      - "[0-9]+.[0-9]+.[0-9]+a[0-9]+"
+      - "[0-9]+.[0-9]+.[0-9]+b[0-9]+"
+      - "[0-9]+.[0-9]+.[0-9]+rc[0-9]+"
+env:
+  PACKAGE_NAME: vocalizr
+  OWNER: AlphaSphereDotAI
+permissions:
+  contents: write
+jobs:
+  details:
+    runs-on: ubuntu-latest
+    outputs:
+      new_version: ${{ steps.release.outputs.new_version }}
+      tag_name: ${{ steps.release.outputs.tag_name }}
+    steps:
+      - name: Checkout Code
+        uses: actions/[email protected]
+      - name: Extract tag and Details
+        id: release
+        run: |
+          if [ "${{ github.ref_type }}" = "tag" ]; then
+            TAG_NAME=${GITHUB_REF#refs/tags/}
+            NEW_VERSION=$(echo $TAG_NAME | awk -F'-' '{print $1}')
+            SUFFIX=$(echo $TAG_NAME | grep -oP '[a-z]+[0-9]+' || echo "")
+            echo "new_version=$NEW_VERSION" >> "$GITHUB_OUTPUT"
+            echo "suffix=$SUFFIX" >> "$GITHUB_OUTPUT"
+            echo "tag_name=$TAG_NAME" >> "$GITHUB_OUTPUT"
+            echo "Version is $NEW_VERSION"
+            echo "Suffix is $SUFFIX"
+            echo "Tag name is $TAG_NAME"
+          else
+            echo "No tag found"
+            exit 1
+          fi
+  setup_and_build:
+    needs: [details]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Code
+        uses: actions/[email protected]
+      - name: Setup UV
+        uses: ./.github/actions/uv
+      - name: Update Project Version
+        run: uv version ${{ needs.details.outputs.new_version }}
+      - name: Build source and wheel distribution
+        run: uv build --all-packages
+      - name: Upload artifacts
+        uses: actions/upload-artifact@v4
+        with:
+          name: dist
+          path: dist/
+  github_release:
+    name: Create GitHub Release
+    needs: [setup_and_build, details]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Code
+        uses: actions/[email protected]
+        with:
+          fetch-depth: 0
+      - name: Download artifacts
+        uses: actions/download-artifact@v4
+        with:
+          name: dist
+          path: dist/
+      - name: Create GitHub Release
+        id: create_release
+        env:
+          GH_TOKEN: ${{ github.token }}
+        run: gh release create ${{ needs.details.outputs.tag_name }} dist/* --title ${{ needs.details.outputs.tag_name }} --generate-notes

.github/workflows/huggingface.yml ADDED Viewed

	@@ -0,0 +1,30 @@

+name: Push to HuggingFace
+on:
+  push:
+env:
+  HF_HUB_ENABLE_HF_TRANSFER: 1
+jobs:
+  huggingface:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Setup UV
+        uses: ./.github/actions/uv
+      - name: Remove Unnecessary Files
+        run: |
+          echo ".github" >> .gitignore
+          echo ".idea" >> .gitignore
+          echo "docker-bake.hcl" >> .gitignore
+          echo "renovate.json" >> .gitignore
+          echo ".deepsource.toml" >> .gitignore
+      - name: Add to git credentials
+        run: git config --global credential.helper store
+      - name: Login to HuggingFace Hub
+        run: uv run huggingface-cli login --add-to-git-credential --token ${{ secrets.HF_TOKEN }}
+      - name: Check if logged in
+        run: uv run huggingface-cli whoami
+      - name: Upload
+        run: uv run huggingface-cli upload AlphaSphereDotAI/Vocalizr . . --repo-type space
+      # - name: Minimize uv cache
+      #   run: uv cache prune --ci

.gitignore CHANGED Viewed

@@ -4,7 +4,7 @@ tmp/
 .mypy_cache/
 **/__pycache__/
 .env
-.github
 .idea
 docker-bake.hcl
 renovate.json

 .mypy_cache/
 **/__pycache__/
 .env
+results/.github
 .idea
 docker-bake.hcl
 renovate.json

src/vocalizr/__init__.py CHANGED Viewed

@@ -10,7 +10,7 @@ load_dotenv()
 BASE_DIR: Path = Path(__file__).parent.parent.parent
 DEBUG: bool = getenv(key="DEBUG", default="False").lower() == "true"
-CHAR_LIMIT: int = int(getenv(key="CHAR_LIMIT", default="5000"))
 SERVER_NAME: str = getenv(key="GRADIO_SERVER_NAME", default="localhost")
 SERVER_PORT: int = int(getenv(key="GRADIO_SERVER_PORT", default="8080"))
 PIPELINE: KPipeline = KPipeline(lang_code="a", repo_id="hexgrad/Kokoro-82M")

 BASE_DIR: Path = Path(__file__).parent.parent.parent
 DEBUG: bool = getenv(key="DEBUG", default="False").lower() == "true"
+CHAR_LIMIT: int = int(getenv(key="CHAR_LIMIT", default="-1"))
 SERVER_NAME: str = getenv(key="GRADIO_SERVER_NAME", default="localhost")
 SERVER_PORT: int = int(getenv(key="GRADIO_SERVER_PORT", default="8080"))
 PIPELINE: KPipeline = KPipeline(lang_code="a", repo_id="hexgrad/Kokoro-82M")

src/vocalizr/__main__.py CHANGED Viewed

@@ -7,7 +7,7 @@ from vocalizr.gui import app_block
 def main() -> None:
     """Launch the Gradio voice generation web application."""
     app: Blocks = app_block()
-    app.launch(
         server_name=SERVER_NAME,
         server_port=SERVER_PORT,
         debug=DEBUG,

 def main() -> None:
     """Launch the Gradio voice generation web application."""
     app: Blocks = app_block()
+    app.queue(api_open=True).launch(
         server_name=SERVER_NAME,
         server_port=SERVER_PORT,
         debug=DEBUG,

src/vocalizr/gui.py CHANGED Viewed

@@ -10,7 +10,7 @@ from gradio import (
     Textbox,
 )
-from vocalizr import CHAR_LIMIT, CHOICES, CUDA_AVAILABLE
 from vocalizr.model import generate_audio_for_text
@@ -24,13 +24,7 @@ def app_block() -> Blocks:
             with Column():
                 text: Textbox = Textbox(
                     label="Input Text",
-                    info=(
-                        f"""
-                         Up to ~500 characters per Generate,
-                         or {"∞" if CHAR_LIMIT is None else CHAR_LIMIT}
-                         characters per Stream
-                        """
-                    ),
                 )
                 with Row():
                     voice: Dropdown = Dropdown(
@@ -47,7 +41,8 @@ def app_block() -> Blocks:
                         interactive=CUDA_AVAILABLE,
                     )
                     save_file = Checkbox(
-                        label="Save Audio", info="Save audio to local storage"
                     )
                 speed: Slider = Slider(
                     minimum=0.5,
@@ -60,13 +55,25 @@ def app_block() -> Blocks:
                 out_audio: Audio = Audio(
                     label="Output Audio",
                     interactive=False,
-                    streaming=False,
                     autoplay=True,
                 )
-                generate_btn: Button = Button("Generate", variant="primary")
-        generate_btn.click(
             fn=generate_audio_for_text,
             inputs=[text, voice, speed, save_file],
             outputs=[out_audio],
         )
     return app

     Textbox,
 )
+from vocalizr import CHOICES, CUDA_AVAILABLE
 from vocalizr.model import generate_audio_for_text
             with Column():
                 text: Textbox = Textbox(
                     label="Input Text",
+                    info=("""Enter your text here"""),
                 )
                 with Row():
                     voice: Dropdown = Dropdown(
                         interactive=CUDA_AVAILABLE,
                     )
                     save_file = Checkbox(
+                        label="Save Audio",
+                        info="Save audio to local storage",
                     )
                 speed: Slider = Slider(
                     minimum=0.5,
                 out_audio: Audio = Audio(
                     label="Output Audio",
                     interactive=False,
+                    streaming=True,
                     autoplay=True,
                 )
+                with Row():
+                    stream_btn: Button = Button(
+                        value="Generate",
+                        variant="primary",
+                    )
+                    stop_btn: Button = Button(
+                        value="Stop",
+                        variant="stop",
+                    )
+        stream_event = stream_btn.click(
             fn=generate_audio_for_text,
             inputs=[text, voice, speed, save_file],
             outputs=[out_audio],
         )
+        stop_btn.click(
+            fn=None,
+            cancels=stream_event,
+        )
     return app

src/vocalizr/model.py CHANGED Viewed

@@ -1,55 +1,73 @@
-from datetime import datetime
-from os import makedirs
 from gradio import Error
 from loguru import logger
-from numpy import ndarray
 from soundfile import write
-from torch import Tensor
-from vocalizr import BASE_DIR, CHAR_LIMIT, PIPELINE
-def save_file_wav(audio: ndarray) -> None:
     """Save audio data to a WAV file in the 'results' directory.
     Creates a timestamped WAV file in the 'results' directory with
     the provided audio data at a fixed sample rate of 24,000 Hz.
-    :param audio: Data to save.
-    :return: None
-    :raise OSError: If an error occurs while saving the file.
     """
-    makedirs(name="results", exist_ok=True)
-    filename: str = (
-        f"{BASE_DIR}/results/{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.wav"
-    )
     try:
-        logger.info(f"Saving audio to {filename}")
-        write(file=filename, data=audio, samplerate=24000)
-    except OSError as e:
-        raise OSError(f"Failed to save audio to {filename}: {e}") from e
 def generate_audio_for_text(
-    text: str, voice: str = "af_heart", speed: float = 1, save_file: bool = False
-) -> tuple[int, ndarray]:
     """Generate audio for the input text.
-    :param text:  Input text to convert to speech
-    :param voice: Voice identifier
-    :param speed: Speech speed multiplier
-    :param save_file: If to save the audio file to disk.
-    :return: Tuple containing the audio sample rate and raw audio data.
-    :raise Error: If an error occurs during generation.
     """
-    text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
     try:
-        for _, _, audio in PIPELINE(text, voice, speed):
-            audio = Tensor(audio).numpy()
-            if save_file:
-                save_file_wav(audio=audio)
-            return 24000, audio
-    except Error as e:
-        raise Error(message=str(e)) from e
-    raise RuntimeError("No audio generated")

+from typing import Any, Generator, Literal
 from gradio import Error
+from kokoro import KPipeline
 from loguru import logger
+from numpy import float32
+from numpy.typing import NDArray
 from soundfile import write
+from vocalizr import CHAR_LIMIT, PIPELINE, AUDIO_FILE_PATH
+@logger.catch
+def save_file_wav(audio: NDArray[float32]) -> None:
     """Save audio data to a WAV file in the 'results' directory.
     Creates a timestamped WAV file in the 'results' directory with
     the provided audio data at a fixed sample rate of 24,000 Hz.
+    Args:
+        audio (NDArray[float32]): raw audio data.
+    Raises:
+        RuntimeError: If there are problems with saving the audio file locally.
     """
     try:
+        logger.info(f"Saving audio to {AUDIO_FILE_PATH}")
+        write(file=AUDIO_FILE_PATH, data=audio, samplerate=24000)
+    except Exception as e:
+        logger.exception(f"Failed to save audio to {AUDIO_FILE_PATH}: {e}")
+        raise RuntimeError(f"Failed to save audio to {AUDIO_FILE_PATH}: {e}") from e
+# noinspection PyTypeChecker
+@logger.catch
 def generate_audio_for_text(
+    text: str,
+    voice: str = "af_heart",
+    speed: float = 1,
+    save_file: bool = False,
+) -> Generator[tuple[Literal[24000], NDArray[float32]], Any, None]:
     """Generate audio for the input text.
+    Args:
+        text (str): Input text to convert to speech
+        voice (str, optional): Voice identifier. Defaults to "af_heart".
+        speed (float, optional): Speech speed. Defaults to 1.
+        save_file (bool, optional): If to save the audio file to disk. Defaults to False.
+    Raises:
+        Error: If text (str) is empty
+        Error: If audio (NDArray[float32]) is str
+        Error: If audio (NDArray[float32]) is None
+    Yields:
+        Generator[tuple[Literal[24000], NDArray[float32]], Any, None]: Tuple containing the audio sample rate and raw audio data.
     """
     try:
+        text = text if CHAR_LIMIT == -1 else text.strip()[:CHAR_LIMIT]
+    except Exception as e:
+        logger.exception(str(object=e))
+        raise Error(message=str(object=e)) from e
+    generator: Generator[KPipeline.Result, None, None] = PIPELINE(
+        text=text, voice=voice, speed=speed
+    )
+    logger.info(f"Generating audio for '{text}'")
+    for _, _, audio in generator:
+        if audio is None or isinstance(audio, str):
+            logger.exception(f"Unexpected type (audio): {type(audio)}")
+            raise Error(message=f"Unexpected type (audio): {type(audio)}")
+        audio_np: NDArray[float32] = audio.numpy()
+        if save_file:
+            save_file_wav(audio=audio_np)
+            logger.info(f"Yielding audio for '{text}'")
+        yield 24000, audio_np