Spaces:

enskaff
/

mistral-vllm-chat

Runtime error

App Files Files Community

enskaff commited on Apr 11

Commit

336ec47

verified ·

1 Parent(s): e6ed01f

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +68 -21

Dockerfile CHANGED Viewed

@@ -1,40 +1,87 @@
 FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
-# System packages (including curl and build tools)
-RUN apt-get update && apt-get install -y \
     git \
     git-lfs \
     python3 \
     python3-pip \
     curl \
     build-essential \
     && rm -rf /var/lib/apt/lists/*
-# Install Rust (needed for tokenizers)
-RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
-ENV PATH="/root/.cargo/bin:${PATH}"
-# Python packages
-RUN pip3 install vllm accelerate
-# Download Mistral model from Hugging Face
-RUN mkdir /app
-WORKDIR /app
-# Set HF token (replace with your own or use an ARG)
-ARG HF_TOKEN
-ENV HF_TOKEN=${HF_TOKEN}
-# Download model using Hugging Face Hub
-RUN pip install huggingface_hub
-RUN python3 -c "\
-from huggingface_hub import snapshot_download; \
-snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token='${HF_TOKEN}')"
-# Expose port for API
 EXPOSE 8000
-# Run vLLM API server
-CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]

+# Use a specific CUDA version and OS combination
 FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
+# Set environment variables to ensure non-interactive installs
+ENV DEBIAN_FRONTEND=noninteractive \
+    PIP_NO_CACHE_DIR=off \
+    PIP_DISABLE_PIP_VERSION_CHECK=on \
+    # Set path for Rust/Cargo
+    PATH="/root/.cargo/bin:${PATH}" \
+    # Set default Python encoding (good practice)
+    PYTHONIOENCODING=utf-8 \
+    PYTHONUNBUFFERED=1
+# System packages:
+# - Combine update and install in one layer to reduce size.
+# - Use --no-install-recommends to avoid unnecessary packages.
+# - Install build tools, git, curl, python, pip, and ca-certificates (for HTTPS).
+# - Clean up apt cache afterwards.
+RUN apt-get update && apt-get install -y --no-install-recommends \
     git \
     git-lfs \
     python3 \
     python3-pip \
     curl \
+    ca-certificates \
     build-essential \
+    && apt-get clean \
     && rm -rf /var/lib/apt/lists/*
+# Install Rust using the recommended secure method
+# Needed for tokenizers compilation if wheels are not available
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+# Note: Rust and build-essential add significantly to image size.
+# Consider a multi-stage build if size is critical (see notes below).
+# Set up the application directory
+WORKDIR /app
+# Copy requirements file first to leverage Docker cache
+COPY requirements.txt .
+# Install Python dependencies from requirements file
+# Upgrading pip first is good practice.
+RUN pip3 install --no-cache-dir --upgrade pip
+RUN pip3 install --no-cache-dir -r requirements.txt
+# --- Model Download ---
+# Use ARG for build-time secret (HF Token)
+ARG HF_TOKEN
+# Check if HF_TOKEN was provided during build
+RUN if [ -z "$HF_TOKEN" ]; then echo "Error: HF_TOKEN build argument is required." && exit 1; fi
+# Download the model using huggingface_hub Python library
+# Storing the token in an intermediate ENV variable is okay here as it's needed by the script.
+# The layer containing the ENV declaration itself won't persist the token in the final image history in the same way as hardcoding it.
+# Ensure your CI/CD or build process handles the ARG securely.
+RUN echo "Downloading model mistralai/Mistral-7B-Instruct-v0.1..." && \
+    HF_TOKEN=${HF_TOKEN} python3 -c "
+from huggingface_hub import snapshot_download
+import os
+token = os.environ.get('HF_TOKEN')
+if not token:
+    raise ValueError('HF_TOKEN environment variable not set inside the script')
+snapshot_download(
+    repo_id='mistralai/Mistral-7B-Instruct-v0.1',
+    local_dir='/app/model',
+    token=token,
+    # Optional: Add ignore_patterns if you know you only need specific file types
+    # ignore_patterns=['*.safetensors', '*.h5', '*.msgpack']
+)
+print('Model download complete.')
+"
+# Grant execute permissions if needed (though likely not for model files)
+# RUN chmod -R +r /app/model
+# Expose the port vLLM will run on
 EXPOSE 8000
+# Healthcheck (Optional but recommended for Spaces)
+# Checks if the API server is responding on port 8000
+HEALTHCHECK --interval=15s --timeout=5s --start-period=30s --retries=3 \
+    CMD curl --fail http://localhost:8000/health || exit 1
+# Define the entrypoint command
+# Using python3 -m is standard practice
+CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]