File size: 2,160 Bytes
91537f8
 
5ef2360
91537f8
157b1a9
5ef2360
abd4f63
 
91537f8
 
5ef2360
157b1a9
 
abd4f63
 
ddd255d
 
 
 
157b1a9
 
91537f8
 
 
9290d06
91537f8
 
 
5ef2360
91537f8
 
 
 
 
9290d06
91537f8
 
157b1a9
9290d06
91537f8
 
 
 
 
 
 
 
 
 
 
 
ddd255d
 
 
 
 
91537f8
abd4f63
3375ee2
abd4f63
 
3375ee2
91537f8
 
5ef2360
91537f8
 
5ef2360
91537f8
9290d06
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# Stage 1: Get uv installer
FROM ghcr.io/astral-sh/uv:0.2.12 as uv

# Stage 2: Main application image
FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive

# Copy uv from first stage
COPY --from=uv /uv /uv

# Install Python, pip, venv, and system dependencies
RUN apt-get update && \
    apt-get upgrade -y && \
    apt-get install -y --no-install-recommends \
    python3.11 python3.11-venv python3-pip ffmpeg \
    build-essential \
    git \
    && apt-get clean && \
    rm -rf /var/lib/apt/lists/*

# Create virtual environment with uv
RUN --mount=type=cache,target=/root/.cache/uv \
    /uv venv /opt/venv

# Set environment variables
ENV VIRTUAL_ENV=/opt/venv \
    PATH="/opt/venv/bin:$PATH"

# Create user and set permissions (required for HF Spaces)
RUN useradd -m -u 1000 user && \
    chown -R user /opt/venv

# Switch to user context
USER user
WORKDIR /app

# Set home to user's home directory and other envs
ENV HOME=/home/user \
    PATH=/home/user/.local/bin:$PATH \
    HF_HOME=/home/user/.cache/huggingface \
    UV_CACHE_DIR=/app/.uv-cache

# Create cache directory with proper permissions
RUN mkdir -p $UV_CACHE_DIR && chown -R user:user $UV_CACHE_DIR

# Copy requirements first for caching
COPY --chown=user requirements.txt .

# Install Python packages with uv caching
RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \
    /uv pip install --no-cache-dir -r requirements.txt

# Install build dependencies for flash-attn
RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \
    /uv pip install -U --no-cache-dir setuptools wheel ninja packaging

# Install flash-attn
RUN --mount=type=cache,target=$UV_CACHE_DIR,uid=1000,gid=1000 \
    /uv pip install flash-attn --no-build-isolation
    #/uv pip install https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl

# Copy application code
COPY --chown=user . .

# Expose FastRTC port (matches HF Spaces default)
EXPOSE 7860

# Start the application using uvicorn (FastAPI)
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]