Spaces:

jameszokah
/

jamiya

Running

App Files Files Community

jameszokah commited on 20 days ago

Commit

63f90ce

1 Parent(s): 74c62a2

Refactor Docker setup: update docker-compose.yml to define app and db services, adjust ports, and configure environment variables; modify Dockerfile to use Python base image, install necessary dependencies, and set up application structure.

Browse files

Files changed (6) hide show

Dockerfile +53 -65
alembic.ini +82 -0
docker-compose.yml +31 -19
docker-entrypoint.sh +19 -0
migrations/env.py +63 -0
migrations/versions/initial_migration.py +76 -0

Dockerfile CHANGED Viewed

@@ -17,85 +17,73 @@ RUN if [ -n "$HF_TOKEN" ]; then \
     else echo "No HF_TOKEN provided, model download will be skipped"; fi
 # Now for the main application stage
-FROM nvidia/cuda:12.4.0-base-ubuntu22.04
-# Set environment variables
-ENV PYTHONFAULTHANDLER=1 \
-    PYTHONUNBUFFERED=1 \
-    PYTHONHASHSEED=random \
-    PIP_NO_CACHE_DIR=1 \
-    PIP_DISABLE_PIP_VERSION_CHECK=1 \
-    PIP_DEFAULT_TIMEOUT=100 \
-    NVIDIA_VISIBLE_DEVICES=all \
-    NVIDIA_DRIVER_CAPABILITIES=compute,utility \
-    TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6" \
-    TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
 # Install system dependencies
-RUN apt-get update && apt-get install -y --no-install-recommends \
-    python3 \
-    python3-pip \
-    python3-dev \
     ffmpeg \
     git \
-    build-essential \
-    sudo \
-    && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
-# Create user and give sudo access
-RUN useradd -m -s /bin/bash user && \
-    usermod -aG sudo user && \
-    echo "user ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
 # Set working directory
 WORKDIR /app
-# Create and set up persistent directories with proper permissions
-RUN mkdir -p /app/static /app/models /app/voice_memories /app/voice_references \
-    /app/voice_profiles /app/cloned_voices /app/audio_cache /app/tokenizers /app/logs && \
-    chown -R user:user /app && \
-    chmod -R 755 /app && \
-    chmod -R 777 /app/voice_references /app/voice_profiles /app/voice_memories \
-    /app/cloned_voices /app/audio_cache /app/static /app/logs /app/tokenizers /app/models
-# Copy requirements first for better caching
-COPY --chown=user:user requirements.txt .
-# Switch to user
-USER user
 # Install Python dependencies
-RUN pip3 install --no-cache-dir --upgrade pip && \
-    pip3 install torch torchaudio numpy
-# Install torchao from source
-RUN pip3 install git+https://github.com/pytorch/ao.git
-# Install torchtune from source with specific branch for latest features
-RUN git clone https://github.com/pytorch/torchtune.git /tmp/torchtune && \
-    cd /tmp/torchtune && \
-    # Try to use the main branch, which should have llama3_2
-    git checkout main && \
-    pip install -e .
-# Install remaining dependencies
-RUN pip3 install -r requirements.txt
-# Install additional dependencies for streaming and voice cloning
-RUN pip3 install yt-dlp openai-whisper
-# Copy static files and application code
-COPY --chown=user:user ./static /app/static
-COPY --chown=user:user ./app /app/app
-# Copy downloaded model from the model-downloader stage
-COPY --chown=user:user --from=model-downloader /model-downloader/models /app/models
-# Show available models in torchtune
-RUN python3 -c "import torchtune.models; print('Available models in torchtune:', dir(torchtune.models))"
 # Expose port
 EXPOSE 7860
-# Command to run the application
-CMD ["python3", "-m", "app.main"]

     else echo "No HF_TOKEN provided, model download will be skipped"; fi
 # Now for the main application stage
+FROM python:3.10-slim
 # Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
     ffmpeg \
     git \
+    netcat-openbsd \
     && rm -rf /var/lib/apt/lists/*
 # Set working directory
 WORKDIR /app
 # Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Install additional dependencies for database and storage
+RUN pip install --no-cache-dir \
+    sqlalchemy==2.0.27 \
+    aiofiles==23.2.1 \
+    psycopg2-binary==2.9.9 \
+    alembic==1.13.1
+# Create necessary directories
+RUN mkdir -p /app/storage/audio \
+    /app/storage/text \
+    /app/models \
+    /app/tokenizers \
+    /app/voice_memories \
+    /app/voice_references \
+    /app/voice_profiles \
+    /app/cloned_voices \
+    /app/audio_cache \
+    /app/static \
+    /app/logs \
+    /app/migrations/versions
+# Copy the model from the model-downloader stage
+COPY --from=model-downloader /model-downloader/models /app/models
+# Copy application code
+COPY . .
+# Copy and set up entrypoint script
+COPY docker-entrypoint.sh /usr/local/bin/
+RUN chmod +x /usr/local/bin/docker-entrypoint.sh
+# Create volume mount points
+VOLUME ["/app/storage", "/app/models", "/app/logs"]
+# Set environment variables
+ENV PYTHONPATH=/app \
+    DATABASE_URL=sqlite:///app/storage/audiobooks.db \
+    STORAGE_PATH=/app/storage
+# Set permissions for all directories
+RUN chown -R nobody:nogroup /app && \
+    chmod -R 755 /app && \
+    # Make migrations directory writable
+    chmod -R 777 /app/migrations
+# Switch to non-root user
+USER nobody
 # Expose port
 EXPOSE 7860
+# Set the entrypoint
+ENTRYPOINT ["docker-entrypoint.sh"]

alembic.ini ADDED Viewed

	@@ -0,0 +1,82 @@

+[alembic]
+# path to migration scripts
+script_location = migrations
+# template used to generate migration files
+file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d%%(second).2d_%%(slug)s
+# timezone to use when rendering the date
+# within the migration file as well as the filename.
+# string value is passed to dateutil.tz.gettz()
+# leave blank for localtime
+timezone = UTC
+# max length of characters to apply to the
+# "slug" field
+truncate_slug_length = 40
+# set to 'true' to run the environment during
+# the 'revision' command, regardless of autogenerate
+revision_environment = false
+# set to 'true' to allow .pyc and .pyo files without
+# a source .py file to be detected as revisions in the
+# versions/ directory
+sourceless = false
+# version location specification; this defaults
+# to migrations/versions.  When using multiple version
+# directories, initial revisions must be specified with --version-path
+version_locations = %(here)s/migrations/versions
+# the output encoding used when revision files
+# are written from script.py.mako
+output_encoding = utf-8
+sqlalchemy.url = driver://user:pass@localhost/dbname
+[post_write_hooks]
+# post_write_hooks defines scripts or Python functions that are run
+# on newly generated revision scripts.  See the documentation for further
+# detail and examples
+# format using "black" - use the console_scripts runner, against the "black" entrypoint
+# hooks = black
+# black.type = console_scripts
+# black.entrypoint = black
+# black.options = -l 79 REVISION_SCRIPT_FILENAME
+# Logging configuration
+[loggers]
+keys = root,sqlalchemy,alembic
+[handlers]
+keys = console
+[formatters]
+keys = generic
+[logger_root]
+level = WARN
+handlers = console
+qualname =
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S

docker-compose.yml CHANGED Viewed

@@ -1,23 +1,35 @@
 services:
-  csm-api:
-    build:
-      context: .
-      dockerfile: Dockerfile
-      args:
-        - HF_TOKEN=${HF_TOKEN}
     ports:
-      - "8000:8000"
     volumes:
-      - ./models:/app/models
-      - ./cloned_voices:/app/cloned_voices
-      - ./voice_references:/app/voice_references
-      - ./voice_profiles:/app/voice_profiles
     environment:
-      - HF_TOKEN=${HF_TOKEN}
-    deploy:
-      resources:
-        reservations:
-          devices:
-            - driver: nvidia
-              count: all
-              capabilities: [gpu]

+version: '3.8'
 services:
+  app:
+    build: .
     ports:
+      - "7860:7860"
+    environment:
+      - DATABASE_URL=postgresql://postgres:postgres@db:5432/audiobooks
+      - STORAGE_PATH=/app/storage
+      - PYTHONPATH=/app
+      - LOG_LEVEL=INFO
     volumes:
+      - app_storage:/app/storage
+      - app_models:/app/models
+      - app_logs:/app/logs
+    depends_on:
+      - db
+  db:
+    image: postgres:15-alpine
     environment:
+      - POSTGRES_USER=postgres
+      - POSTGRES_PASSWORD=postgres
+      - POSTGRES_DB=audiobooks
+    volumes:
+      - postgres_data:/var/lib/postgresql/data
+    ports:
+      - "5432:5432"
+volumes:
+  app_storage:
+  app_models:
+  app_logs:
+  postgres_data:

docker-entrypoint.sh ADDED Viewed

	@@ -0,0 +1,19 @@

+#!/bin/sh
+set -e
+# Wait for database to be ready (if using PostgreSQL)
+if [ "$DATABASE_URL" != "${DATABASE_URL#postgresql://}" ]; then
+    echo "Waiting for PostgreSQL to be ready..."
+    while ! nc -z db 5432; do
+        sleep 0.1
+    done
+    echo "PostgreSQL is ready"
+fi
+# Run database migrations
+echo "Running database migrations..."
+alembic upgrade head
+# Start the application
+echo "Starting the application..."
+exec python -m app.main

migrations/env.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import os
+from logging.config import fileConfig
+from sqlalchemy import engine_from_config
+from sqlalchemy import pool
+from alembic import context
+# Import your models here
+from app.models.database import Base
+# this is the Alembic Config object
+config = context.config
+# Interpret the config file for Python logging
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+target_metadata = Base.metadata
+def get_url():
+    """Get database URL from environment variable."""
+    return os.getenv(
+        "DATABASE_URL",
+        "sqlite:///app/storage/audiobooks.db"
+    )
+def run_migrations_offline() -> None:
+    """Run migrations in 'offline' mode."""
+    url = get_url()
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        dialect_opts={"paramstyle": "named"},
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+def run_migrations_online() -> None:
+    """Run migrations in 'online' mode."""
+    configuration = config.get_section(config.config_ini_section)
+    configuration["sqlalchemy.url"] = get_url()
+    connectable = engine_from_config(
+        configuration,
+        prefix="sqlalchemy.",
+        poolclass=pool.NullPool,
+    )
+    with connectable.connect() as connection:
+        context.configure(
+            connection=connection,
+            target_metadata=target_metadata
+        )
+        with context.begin_transaction():
+            context.run_migrations()
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    run_migrations_online()

migrations/versions/initial_migration.py ADDED Viewed

	@@ -0,0 +1,76 @@

+"""Initial migration
+Revision ID: 001
+Revises:
+Create Date: 2024-03-19 10:00:00.000000
+"""
+from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
+import enum
+# revision identifiers, used by Alembic.
+revision = '001'
+down_revision = None
+branch_labels = None
+depends_on = None
+class AudiobookStatus(enum.Enum):
+    """Status of an audiobook."""
+    PENDING = "pending"
+    PROCESSING = "processing"
+    COMPLETED = "completed"
+    FAILED = "failed"
+def upgrade() -> None:
+    # Create enum type for PostgreSQL
+    if op.get_bind().dialect.name == 'postgresql':
+        op.execute('CREATE TYPE audiobookstatus AS ENUM (\'pending\', \'processing\', \'completed\', \'failed\')')
+        status_type = postgresql.ENUM('pending', 'processing', 'completed', 'failed', name='audiobookstatus')
+    else:
+        status_type = sa.String(20)
+    # Create audiobooks table
+    op.create_table(
+        'audiobooks',
+        sa.Column('id', sa.String(36), primary_key=True),
+        sa.Column('title', sa.String(255), nullable=False),
+        sa.Column('author', sa.String(255), nullable=False),
+        sa.Column('voice_id', sa.Integer, nullable=False),
+        sa.Column('status', status_type, nullable=False),
+        sa.Column('created_at', sa.DateTime, nullable=False),
+        sa.Column('updated_at', sa.DateTime, nullable=False),
+        sa.Column('text_content', sa.Text, nullable=True),
+        sa.Column('text_file_path', sa.String(255), nullable=True),
+        sa.Column('audio_file_path', sa.String(255), nullable=True),
+        sa.Column('error_message', sa.Text, nullable=True),
+    )
+    # Create audiobook_chunks table
+    op.create_table(
+        'audiobook_chunks',
+        sa.Column('id', sa.Integer, primary_key=True),
+        sa.Column('audiobook_id', sa.String(36), sa.ForeignKey('audiobooks.id'), nullable=False),
+        sa.Column('chunk_number', sa.Integer, nullable=False),
+        sa.Column('text_content', sa.Text, nullable=False),
+        sa.Column('audio_file_path', sa.String(255), nullable=True),
+        sa.Column('status', status_type, nullable=False),
+        sa.Column('created_at', sa.DateTime, nullable=False),
+        sa.Column('updated_at', sa.DateTime, nullable=False),
+    )
+    # Create indexes
+    op.create_index('ix_audiobooks_created_at', 'audiobooks', ['created_at'])
+    op.create_index('ix_audiobooks_status', 'audiobooks', ['status'])
+    op.create_index('ix_audiobook_chunks_audiobook_id', 'audiobook_chunks', ['audiobook_id'])
+    op.create_index('ix_audiobook_chunks_chunk_number', 'audiobook_chunks', ['chunk_number'])
+def downgrade() -> None:
+    # Drop tables
+    op.drop_table('audiobook_chunks')
+    op.drop_table('audiobooks')
+    # Drop enum type if using PostgreSQL
+    if op.get_bind().dialect.name == 'postgresql':
+        op.execute('DROP TYPE audiobookstatus')