# Multi-stage build for authenticated model downloads FROM python:3.10-slim AS model-downloader # Install huggingface-cli RUN pip install huggingface_hub # Set working directory WORKDIR /model-downloader # Create directory for downloaded models RUN mkdir -p /model-downloader/models # This will run when building the image # You'll need to pass your Hugging Face token at build time ARG HF_TOKEN ENV HF_TOKEN=${HF_TOKEN} # Login and download model RUN if [ -n "$HF_TOKEN" ]; then \ huggingface-cli login --token ${HF_TOKEN}; \ huggingface-cli download sesame/csm-1b ckpt.pt --local-dir /model-downloader/models; \ else echo "No HF_TOKEN provided, model download will be skipped"; fi # Now for the main application stage FROM nvidia/cuda:12.4.0-base-ubuntu22.04 # Set environment variables ENV PYTHONFAULTHANDLER=1 \ PYTHONUNBUFFERED=1 \ PYTHONHASHSEED=random \ PIP_NO_CACHE_DIR=1 \ PIP_DISABLE_PIP_VERSION_CHECK=1 \ PIP_DEFAULT_TIMEOUT=100 \ NVIDIA_VISIBLE_DEVICES=all \ NVIDIA_DRIVER_CAPABILITIES=compute,utility \ TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6" \ TORCH_NVCC_FLAGS="-Xfatbin -compress-all" ENV HF_HOME=/app/.cache RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ python3-pip \ python3-dev \ ffmpeg \ git \ build-essential \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* # Set working directory WORKDIR /app # Copy requirements first for better caching COPY requirements.txt . # Create and set up persistent directories with proper permissions RUN mkdir -p /app/static /app/models /app/voice_memories /app/voice_references \ /app/voice_profiles /app/cloned_voices /app/audio_cache /app/tokenizers /app/logs && \ chmod -R 777 /app/voice_references /app/voice_profiles /app/voice_memories \ /app/cloned_voices /app/audio_cache /app/static /app/logs /app/tokenizers /app/models # Copy static files COPY ./static /app/static # Install Python dependencies RUN pip3 install --no-cache-dir --upgrade pip && \ pip3 install torch torchaudio numpy # Install torchao from source RUN pip3 install git+https://github.com/pytorch/ao.git # Install torchtune from source with specific branch for latest features RUN git clone https://github.com/pytorch/torchtune.git /tmp/torchtune && \ cd /tmp/torchtune && \ # Try to use the main branch, which should have llama3_2 git checkout main && \ pip install -e . # Install remaining dependencies RUN pip3 install -r requirements.txt # Install additional dependencies for streaming and voice cloning RUN pip3 install yt-dlp openai-whisper # Copy application code COPY ./app /app/app # Copy downloaded model from the model-downloader stage COPY --from=model-downloader /model-downloader/models /app/models # Show available models in torchtune RUN python3 -c "import torchtune.models; print('Available models in torchtune:', dir(torchtune.models))" # Expose port EXPOSE 8000 # Command to run the application CMD ["python3", "-m", "app.main"]