Sofia Casadei
install flash attention
ddd255d
# Stage 1: Get uv installer
FROM ghcr.io/astral-sh/uv:0.2.12 as uv
# Stage 2: Main application image
FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04
ENV DEBIAN_FRONTEND=noninteractive
# Copy uv from first stage
COPY --from=uv /uv /uv
# Install Python, pip, venv, and system dependencies
RUN apt-get update && \
apt-get upgrade -y && \
apt-get install -y --no-install-recommends \
python3.11 python3.11-venv python3-pip ffmpeg \
build-essential \
git \
&& apt-get clean && \
rm -rf /var/lib/apt/lists/*
# Create virtual environment with uv
RUN --mount=type=cache,target=/root/.cache/uv \
/uv venv /opt/venv
# Set environment variables
ENV VIRTUAL_ENV=/opt/venv \
PATH="/opt/venv/bin:$PATH"
# Create user and set permissions (required for HF Spaces)
RUN useradd -m -u 1000 user && \
chown -R user /opt/venv
# Switch to user context
USER user
WORKDIR /app
# Set home to user's home directory and other envs
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH \
HF_HOME=/home/user/.cache/huggingface \
UV_CACHE_DIR=/app/.uv-cache
# Create cache directory with proper permissions
RUN mkdir -p $UV_CACHE_DIR && chown -R user:user $UV_CACHE_DIR
# Copy requirements first for caching
COPY --chown=user requirements.txt .
# Install Python packages with uv caching
RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \
/uv pip install --no-cache-dir -r requirements.txt
# Install build dependencies for flash-attn
RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \
/uv pip install -U --no-cache-dir setuptools wheel ninja packaging
# Install flash-attn
RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \
/uv pip install flash-attn --no-build-isolation
#/uv pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl
# Copy application code
COPY --chown=user . .
# Expose FastRTC port (matches HF Spaces default)
EXPOSE 7860
# Start the application using uvicorn (FastAPI)
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]