Spaces:

enskaff
/

mistral-vllm-chat

Runtime error

App Files Files Community

enskaff commited on Apr 12

Commit

6f2c834

verified ·

1 Parent(s): 2d5cc5e

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +16 -11

Dockerfile CHANGED Viewed

@@ -1,43 +1,48 @@
 # syntax=docker/dockerfile:1
 FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
-# Set environment variables
 ENV DEBIAN_FRONTEND=noninteractive \
     PIP_NO_CACHE_DIR=off \
     PIP_DISABLE_PIP_VERSION_CHECK=on \
     PYTHONIOENCODING=utf-8 \
-    PYTHONUNBUFFERED=1
-# Install required system packages
 RUN apt-get update && apt-get install -y --no-install-recommends \
     git git-lfs python3 python3-pip curl ca-certificates build-essential && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
-# Install Rust (if needed for building dependencies)
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
-# Set working directory and install Python dependencies
 WORKDIR /app
 COPY requirements.txt .
 RUN pip3 install --no-cache-dir --upgrade pip && \
     pip3 install --no-cache-dir -r requirements.txt
-# Download the model using BuildKit secret mount.
-# This command uses the secret HF_TOKEN, which is passed only during this RUN and not stored in the image.
 RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
     python3 -c "import os; \
 from huggingface_hub import snapshot_download; \
-token = os.getenv('HF_TOKEN'); \
 assert token, 'HF_TOKEN is not set!'; \
 print('Token is set. Downloading model...'); \
 snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token)"
-# Expose port 8000
 EXPOSE 8000
-# Healthcheck (optional)
 HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
     CMD curl --fail http://localhost:8000/health || exit 1
-# Set the entrypoint to run the vLLM API server
 CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]

 # syntax=docker/dockerfile:1
 FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
+# Set essential environment variables and disable numba caching
 ENV DEBIAN_FRONTEND=noninteractive \
     PIP_NO_CACHE_DIR=off \
     PIP_DISABLE_PIP_VERSION_CHECK=on \
     PYTHONIOENCODING=utf-8 \
+    PYTHONUNBUFFERED=1 \
+    TRANSFORMERS_CACHE=/app/transformers_cache \
+    NUMBA_DISABLE_CACHE=1
+# Create the transformers cache directory and ensure it's writable
+RUN mkdir -p /app/transformers_cache && chmod -R 777 /app/transformers_cache
+# Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
     git git-lfs python3 python3-pip curl ca-certificates build-essential && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
+# Optionally, install Rust (needed for some tokenizers or building packages)
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
 WORKDIR /app
+# Copy requirements file and install Python dependencies
 COPY requirements.txt .
 RUN pip3 install --no-cache-dir --upgrade pip && \
     pip3 install --no-cache-dir -r requirements.txt
+# Use BuildKit secret mount to securely inject HF_TOKEN during build.
 RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
     python3 -c "import os; \
 from huggingface_hub import snapshot_download; \
+token = os.environ.get('HF_TOKEN'); \
 assert token, 'HF_TOKEN is not set!'; \
 print('Token is set. Downloading model...'); \
 snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token)"
+# Expose the port for the API server
 EXPOSE 8000
+# Healthcheck to verify API server is responding
 HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
     CMD curl --fail http://localhost:8000/health || exit 1
+# Launch the vLLM OpenAI-style API server, pointing to the downloaded model directory.
 CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]