jameszokah commited on
Commit
63f90ce
·
1 Parent(s): 74c62a2

Refactor Docker setup: update docker-compose.yml to define app and db services, adjust ports, and configure environment variables; modify Dockerfile to use Python base image, install necessary dependencies, and set up application structure.

Browse files
Dockerfile CHANGED
@@ -17,85 +17,73 @@ RUN if [ -n "$HF_TOKEN" ]; then \
17
  else echo "No HF_TOKEN provided, model download will be skipped"; fi
18
 
19
  # Now for the main application stage
20
- FROM nvidia/cuda:12.4.0-base-ubuntu22.04
21
- # Set environment variables
22
- ENV PYTHONFAULTHANDLER=1 \
23
- PYTHONUNBUFFERED=1 \
24
- PYTHONHASHSEED=random \
25
- PIP_NO_CACHE_DIR=1 \
26
- PIP_DISABLE_PIP_VERSION_CHECK=1 \
27
- PIP_DEFAULT_TIMEOUT=100 \
28
- NVIDIA_VISIBLE_DEVICES=all \
29
- NVIDIA_DRIVER_CAPABILITIES=compute,utility \
30
- TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6" \
31
- TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
32
 
33
  # Install system dependencies
34
- RUN apt-get update && apt-get install -y --no-install-recommends \
35
- python3 \
36
- python3-pip \
37
- python3-dev \
38
  ffmpeg \
39
  git \
40
- build-essential \
41
- sudo \
42
- && apt-get clean \
43
  && rm -rf /var/lib/apt/lists/*
44
 
45
- # Create user and give sudo access
46
- RUN useradd -m -s /bin/bash user && \
47
- usermod -aG sudo user && \
48
- echo "user ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
49
-
50
  # Set working directory
51
  WORKDIR /app
52
 
53
- # Create and set up persistent directories with proper permissions
54
- RUN mkdir -p /app/static /app/models /app/voice_memories /app/voice_references \
55
- /app/voice_profiles /app/cloned_voices /app/audio_cache /app/tokenizers /app/logs && \
56
- chown -R user:user /app && \
57
- chmod -R 755 /app && \
58
- chmod -R 777 /app/voice_references /app/voice_profiles /app/voice_memories \
59
- /app/cloned_voices /app/audio_cache /app/static /app/logs /app/tokenizers /app/models
60
-
61
- # Copy requirements first for better caching
62
- COPY --chown=user:user requirements.txt .
63
-
64
- # Switch to user
65
- USER user
66
-
67
  # Install Python dependencies
68
- RUN pip3 install --no-cache-dir --upgrade pip && \
69
- pip3 install torch torchaudio numpy
70
-
71
- # Install torchao from source
72
- RUN pip3 install git+https://github.com/pytorch/ao.git
73
-
74
- # Install torchtune from source with specific branch for latest features
75
- RUN git clone https://github.com/pytorch/torchtune.git /tmp/torchtune && \
76
- cd /tmp/torchtune && \
77
- # Try to use the main branch, which should have llama3_2
78
- git checkout main && \
79
- pip install -e .
80
-
81
- # Install remaining dependencies
82
- RUN pip3 install -r requirements.txt
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- # Install additional dependencies for streaming and voice cloning
85
- RUN pip3 install yt-dlp openai-whisper
86
-
87
- # Copy static files and application code
88
- COPY --chown=user:user ./static /app/static
89
- COPY --chown=user:user ./app /app/app
90
 
91
- # Copy downloaded model from the model-downloader stage
92
- COPY --chown=user:user --from=model-downloader /model-downloader/models /app/models
 
 
 
93
 
94
- # Show available models in torchtune
95
- RUN python3 -c "import torchtune.models; print('Available models in torchtune:', dir(torchtune.models))"
96
 
97
  # Expose port
98
  EXPOSE 7860
99
 
100
- # Command to run the application
101
- CMD ["python3", "-m", "app.main"]
 
17
  else echo "No HF_TOKEN provided, model download will be skipped"; fi
18
 
19
  # Now for the main application stage
20
+ FROM python:3.10-slim
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  # Install system dependencies
23
+ RUN apt-get update && apt-get install -y \
24
+ build-essential \
 
 
25
  ffmpeg \
26
  git \
27
+ netcat-openbsd \
 
 
28
  && rm -rf /var/lib/apt/lists/*
29
 
 
 
 
 
 
30
  # Set working directory
31
  WORKDIR /app
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  # Install Python dependencies
34
+ COPY requirements.txt .
35
+ RUN pip install --no-cache-dir -r requirements.txt
36
+
37
+ # Install additional dependencies for database and storage
38
+ RUN pip install --no-cache-dir \
39
+ sqlalchemy==2.0.27 \
40
+ aiofiles==23.2.1 \
41
+ psycopg2-binary==2.9.9 \
42
+ alembic==1.13.1
43
+
44
+ # Create necessary directories
45
+ RUN mkdir -p /app/storage/audio \
46
+ /app/storage/text \
47
+ /app/models \
48
+ /app/tokenizers \
49
+ /app/voice_memories \
50
+ /app/voice_references \
51
+ /app/voice_profiles \
52
+ /app/cloned_voices \
53
+ /app/audio_cache \
54
+ /app/static \
55
+ /app/logs \
56
+ /app/migrations/versions
57
+
58
+ # Copy the model from the model-downloader stage
59
+ COPY --from=model-downloader /model-downloader/models /app/models
60
+
61
+ # Copy application code
62
+ COPY . .
63
+
64
+ # Copy and set up entrypoint script
65
+ COPY docker-entrypoint.sh /usr/local/bin/
66
+ RUN chmod +x /usr/local/bin/docker-entrypoint.sh
67
+
68
+ # Create volume mount points
69
+ VOLUME ["/app/storage", "/app/models", "/app/logs"]
70
 
71
+ # Set environment variables
72
+ ENV PYTHONPATH=/app \
73
+ DATABASE_URL=sqlite:///app/storage/audiobooks.db \
74
+ STORAGE_PATH=/app/storage
 
 
75
 
76
+ # Set permissions for all directories
77
+ RUN chown -R nobody:nogroup /app && \
78
+ chmod -R 755 /app && \
79
+ # Make migrations directory writable
80
+ chmod -R 777 /app/migrations
81
 
82
+ # Switch to non-root user
83
+ USER nobody
84
 
85
  # Expose port
86
  EXPOSE 7860
87
 
88
+ # Set the entrypoint
89
+ ENTRYPOINT ["docker-entrypoint.sh"]
alembic.ini ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [alembic]
2
+ # path to migration scripts
3
+ script_location = migrations
4
+
5
+ # template used to generate migration files
6
+ file_template = %%(year)d%%(month).2d%%(day).2d_%%(hour).2d%%(minute).2d%%(second).2d_%%(slug)s
7
+
8
+ # timezone to use when rendering the date
9
+ # within the migration file as well as the filename.
10
+ # string value is passed to dateutil.tz.gettz()
11
+ # leave blank for localtime
12
+ timezone = UTC
13
+
14
+ # max length of characters to apply to the
15
+ # "slug" field
16
+ truncate_slug_length = 40
17
+
18
+ # set to 'true' to run the environment during
19
+ # the 'revision' command, regardless of autogenerate
20
+ revision_environment = false
21
+
22
+ # set to 'true' to allow .pyc and .pyo files without
23
+ # a source .py file to be detected as revisions in the
24
+ # versions/ directory
25
+ sourceless = false
26
+
27
+ # version location specification; this defaults
28
+ # to migrations/versions. When using multiple version
29
+ # directories, initial revisions must be specified with --version-path
30
+ version_locations = %(here)s/migrations/versions
31
+
32
+ # the output encoding used when revision files
33
+ # are written from script.py.mako
34
+ output_encoding = utf-8
35
+
36
+ sqlalchemy.url = driver://user:pass@localhost/dbname
37
+
38
+ [post_write_hooks]
39
+ # post_write_hooks defines scripts or Python functions that are run
40
+ # on newly generated revision scripts. See the documentation for further
41
+ # detail and examples
42
+
43
+ # format using "black" - use the console_scripts runner, against the "black" entrypoint
44
+ # hooks = black
45
+ # black.type = console_scripts
46
+ # black.entrypoint = black
47
+ # black.options = -l 79 REVISION_SCRIPT_FILENAME
48
+
49
+ # Logging configuration
50
+ [loggers]
51
+ keys = root,sqlalchemy,alembic
52
+
53
+ [handlers]
54
+ keys = console
55
+
56
+ [formatters]
57
+ keys = generic
58
+
59
+ [logger_root]
60
+ level = WARN
61
+ handlers = console
62
+ qualname =
63
+
64
+ [logger_sqlalchemy]
65
+ level = WARN
66
+ handlers =
67
+ qualname = sqlalchemy.engine
68
+
69
+ [logger_alembic]
70
+ level = INFO
71
+ handlers =
72
+ qualname = alembic
73
+
74
+ [handler_console]
75
+ class = StreamHandler
76
+ args = (sys.stderr,)
77
+ level = NOTSET
78
+ formatter = generic
79
+
80
+ [formatter_generic]
81
+ format = %(levelname)-5.5s [%(name)s] %(message)s
82
+ datefmt = %H:%M:%S
docker-compose.yml CHANGED
@@ -1,23 +1,35 @@
 
 
1
  services:
2
- csm-api:
3
- build:
4
- context: .
5
- dockerfile: Dockerfile
6
- args:
7
- - HF_TOKEN=${HF_TOKEN}
8
  ports:
9
- - "8000:8000"
 
 
 
 
 
10
  volumes:
11
- - ./models:/app/models
12
- - ./cloned_voices:/app/cloned_voices
13
- - ./voice_references:/app/voice_references
14
- - ./voice_profiles:/app/voice_profiles
 
 
 
 
15
  environment:
16
- - HF_TOKEN=${HF_TOKEN}
17
- deploy:
18
- resources:
19
- reservations:
20
- devices:
21
- - driver: nvidia
22
- count: all
23
- capabilities: [gpu]
 
 
 
 
 
 
1
+ version: '3.8'
2
+
3
  services:
4
+ app:
5
+ build: .
 
 
 
 
6
  ports:
7
+ - "7860:7860"
8
+ environment:
9
+ - DATABASE_URL=postgresql://postgres:postgres@db:5432/audiobooks
10
+ - STORAGE_PATH=/app/storage
11
+ - PYTHONPATH=/app
12
+ - LOG_LEVEL=INFO
13
  volumes:
14
+ - app_storage:/app/storage
15
+ - app_models:/app/models
16
+ - app_logs:/app/logs
17
+ depends_on:
18
+ - db
19
+
20
+ db:
21
+ image: postgres:15-alpine
22
  environment:
23
+ - POSTGRES_USER=postgres
24
+ - POSTGRES_PASSWORD=postgres
25
+ - POSTGRES_DB=audiobooks
26
+ volumes:
27
+ - postgres_data:/var/lib/postgresql/data
28
+ ports:
29
+ - "5432:5432"
30
+
31
+ volumes:
32
+ app_storage:
33
+ app_models:
34
+ app_logs:
35
+ postgres_data:
docker-entrypoint.sh ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+ set -e
3
+
4
+ # Wait for database to be ready (if using PostgreSQL)
5
+ if [ "$DATABASE_URL" != "${DATABASE_URL#postgresql://}" ]; then
6
+ echo "Waiting for PostgreSQL to be ready..."
7
+ while ! nc -z db 5432; do
8
+ sleep 0.1
9
+ done
10
+ echo "PostgreSQL is ready"
11
+ fi
12
+
13
+ # Run database migrations
14
+ echo "Running database migrations..."
15
+ alembic upgrade head
16
+
17
+ # Start the application
18
+ echo "Starting the application..."
19
+ exec python -m app.main
migrations/env.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from logging.config import fileConfig
3
+
4
+ from sqlalchemy import engine_from_config
5
+ from sqlalchemy import pool
6
+
7
+ from alembic import context
8
+
9
+ # Import your models here
10
+ from app.models.database import Base
11
+
12
+ # this is the Alembic Config object
13
+ config = context.config
14
+
15
+ # Interpret the config file for Python logging
16
+ if config.config_file_name is not None:
17
+ fileConfig(config.config_file_name)
18
+
19
+ target_metadata = Base.metadata
20
+
21
+ def get_url():
22
+ """Get database URL from environment variable."""
23
+ return os.getenv(
24
+ "DATABASE_URL",
25
+ "sqlite:///app/storage/audiobooks.db"
26
+ )
27
+
28
+ def run_migrations_offline() -> None:
29
+ """Run migrations in 'offline' mode."""
30
+ url = get_url()
31
+ context.configure(
32
+ url=url,
33
+ target_metadata=target_metadata,
34
+ literal_binds=True,
35
+ dialect_opts={"paramstyle": "named"},
36
+ )
37
+
38
+ with context.begin_transaction():
39
+ context.run_migrations()
40
+
41
+ def run_migrations_online() -> None:
42
+ """Run migrations in 'online' mode."""
43
+ configuration = config.get_section(config.config_ini_section)
44
+ configuration["sqlalchemy.url"] = get_url()
45
+ connectable = engine_from_config(
46
+ configuration,
47
+ prefix="sqlalchemy.",
48
+ poolclass=pool.NullPool,
49
+ )
50
+
51
+ with connectable.connect() as connection:
52
+ context.configure(
53
+ connection=connection,
54
+ target_metadata=target_metadata
55
+ )
56
+
57
+ with context.begin_transaction():
58
+ context.run_migrations()
59
+
60
+ if context.is_offline_mode():
61
+ run_migrations_offline()
62
+ else:
63
+ run_migrations_online()
migrations/versions/initial_migration.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Initial migration
2
+
3
+ Revision ID: 001
4
+ Revises:
5
+ Create Date: 2024-03-19 10:00:00.000000
6
+
7
+ """
8
+ from alembic import op
9
+ import sqlalchemy as sa
10
+ from sqlalchemy.dialects import postgresql
11
+ import enum
12
+
13
+ # revision identifiers, used by Alembic.
14
+ revision = '001'
15
+ down_revision = None
16
+ branch_labels = None
17
+ depends_on = None
18
+
19
+ class AudiobookStatus(enum.Enum):
20
+ """Status of an audiobook."""
21
+ PENDING = "pending"
22
+ PROCESSING = "processing"
23
+ COMPLETED = "completed"
24
+ FAILED = "failed"
25
+
26
+ def upgrade() -> None:
27
+ # Create enum type for PostgreSQL
28
+ if op.get_bind().dialect.name == 'postgresql':
29
+ op.execute('CREATE TYPE audiobookstatus AS ENUM (\'pending\', \'processing\', \'completed\', \'failed\')')
30
+ status_type = postgresql.ENUM('pending', 'processing', 'completed', 'failed', name='audiobookstatus')
31
+ else:
32
+ status_type = sa.String(20)
33
+
34
+ # Create audiobooks table
35
+ op.create_table(
36
+ 'audiobooks',
37
+ sa.Column('id', sa.String(36), primary_key=True),
38
+ sa.Column('title', sa.String(255), nullable=False),
39
+ sa.Column('author', sa.String(255), nullable=False),
40
+ sa.Column('voice_id', sa.Integer, nullable=False),
41
+ sa.Column('status', status_type, nullable=False),
42
+ sa.Column('created_at', sa.DateTime, nullable=False),
43
+ sa.Column('updated_at', sa.DateTime, nullable=False),
44
+ sa.Column('text_content', sa.Text, nullable=True),
45
+ sa.Column('text_file_path', sa.String(255), nullable=True),
46
+ sa.Column('audio_file_path', sa.String(255), nullable=True),
47
+ sa.Column('error_message', sa.Text, nullable=True),
48
+ )
49
+
50
+ # Create audiobook_chunks table
51
+ op.create_table(
52
+ 'audiobook_chunks',
53
+ sa.Column('id', sa.Integer, primary_key=True),
54
+ sa.Column('audiobook_id', sa.String(36), sa.ForeignKey('audiobooks.id'), nullable=False),
55
+ sa.Column('chunk_number', sa.Integer, nullable=False),
56
+ sa.Column('text_content', sa.Text, nullable=False),
57
+ sa.Column('audio_file_path', sa.String(255), nullable=True),
58
+ sa.Column('status', status_type, nullable=False),
59
+ sa.Column('created_at', sa.DateTime, nullable=False),
60
+ sa.Column('updated_at', sa.DateTime, nullable=False),
61
+ )
62
+
63
+ # Create indexes
64
+ op.create_index('ix_audiobooks_created_at', 'audiobooks', ['created_at'])
65
+ op.create_index('ix_audiobooks_status', 'audiobooks', ['status'])
66
+ op.create_index('ix_audiobook_chunks_audiobook_id', 'audiobook_chunks', ['audiobook_id'])
67
+ op.create_index('ix_audiobook_chunks_chunk_number', 'audiobook_chunks', ['chunk_number'])
68
+
69
+ def downgrade() -> None:
70
+ # Drop tables
71
+ op.drop_table('audiobook_chunks')
72
+ op.drop_table('audiobooks')
73
+
74
+ # Drop enum type if using PostgreSQL
75
+ if op.get_bind().dialect.name == 'postgresql':
76
+ op.execute('DROP TYPE audiobookstatus')