Spaces:

enskaff
/

mistral-vllm-chat

Runtime error

App Files Files Community

enskaff commited on Apr 12

Commit

3d5ae27

verified ·

1 Parent(s): 8053da8

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +20 -79

Dockerfile CHANGED Viewed

@@ -1,110 +1,51 @@
-# Use a specific CUDA version and OS combination
 FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
-# Set environment variables to ensure non-interactive installs
 ENV DEBIAN_FRONTEND=noninteractive \
     PIP_NO_CACHE_DIR=off \
     PIP_DISABLE_PIP_VERSION_CHECK=on \
-    # Set path for Rust/Cargo
     PATH="/root/.cargo/bin:${PATH}" \
-    # Set default Python encoding (good practice)
     PYTHONIOENCODING=utf-8 \
     PYTHONUNBUFFERED=1
-# System packages:
-# - Combine update and install in one layer to reduce size.
-# - Use --no-install-recommends to avoid unnecessary packages.
-# - Install build tools, git, git-lfs, curl, python, pip, and ca-certificates (for HTTPS).
-# - Clean up apt cache afterwards.
 RUN apt-get update && apt-get install -y --no-install-recommends \
-    git \
-    git-lfs \
-    python3 \
-    python3-pip \
-    curl \
-    ca-certificates \
-    build-essential \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-# Install Rust using the recommended secure method
-# Needed for tokenizers compilation if wheels are not available
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
-# Note: Rust and build-essential add significantly to image size.
-# Consider a multi-stage build if size is critical.
-# Set up the application directory
 WORKDIR /app
-# Copy requirements file first to leverage Docker cache
-# Ensure you have a requirements.txt file in your repository!
 COPY requirements.txt .
-# Install Python dependencies from requirements file
-# Upgrading pip first is good practice.
-RUN pip3 install --no-cache-dir --upgrade pip
-RUN pip3 install --no-cache-dir -r requirements.txt
-# --- Model Download ---
-# Use ARG for build-time secret (HF Token). HF Spaces automatically injects matching secrets.
 ARG HF_TOKEN
-# Check if HF_TOKEN was provided during build (useful for local builds, HF Spaces provides it from Secrets)
-# RUN if [ -z "$HF_TOKEN" ]; then echo "Error: Build argument HF_TOKEN is required but was not provided." && exit 1; fi
-# Optional: Comment out the check above if it causes issues on HF platform and you trust the secret injection
-# Download the model using huggingface_hub Python library via a here-document
-# This passes the script to python3's stdin, avoiding potential parsing issues with -c "..."
 RUN echo "Downloading model mistralai/Mistral-7B-Instruct-v0.1..." && \
-    # Use python3 reading from stdin (here-document)
     python3 <<EOF
-# --- Start of Python Script ---
-import os
-import sys
 from huggingface_hub import snapshot_download
-print('Python environment ready for download script.')
-# HF_TOKEN build arg is automatically available as an environment variable
-# within the context of this RUN command by the Docker builder / HF platform.
 token = os.environ.get('HF_TOKEN')
 if not token:
-    print('Warning: HF_TOKEN environment variable not found. Ensure it is set as a Secret if needed.', file=sys.stderr)
-    # Depending on model visibility, download might proceed or fail.
-    # For public models like Mistral, it might work without a token.
-print(f'Attempting download using token: {"Token Provided" if token else "No Token Provided"}')
 try:
-    snapshot_download(
-        repo_id='mistralai/Mistral-7B-Instruct-v0.1',
-        local_dir='/app/model',
-        token=token, # Pass the token (can be None)
-        # Optional: Add ignore_patterns if you know you only need specific file types
-        # ignore_patterns=['*.safetensors', '*.h5', '*.msgpack']
-        # Add user_agent for better tracking/debugging on HF side
-        user_agent={'dockerfile': 'huggingface-space-vllm-heredoc'}
-    )
-    print('Model download complete.')
 except Exception as e:
-    print(f'Error during model download: {e}', file=sys.stderr)
-    # Exit with error code to fail the Docker build if download fails
     sys.exit(1)
-# --- End of Python Script ---
 EOF
-# Note: The 'EOF' marker above MUST be at the start of the line with no preceding spaces.
-# Expose the port vLLM will run on
 EXPOSE 8000
-# Healthcheck (Optional but recommended for Spaces)
-# Checks if the API server is responding on port 8000
-# Wait longer initially (start-period) for model loading
 HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
-    CMD curl --fail http://localhost:8000/health || exit 1
-# Define the entrypoint command
-# Using python3 -m is standard practice
-CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]

+# Use the NVIDIA CUDA runtime image for GPU support
 FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
+# Set environment variables
 ENV DEBIAN_FRONTEND=noninteractive \
     PIP_NO_CACHE_DIR=off \
     PIP_DISABLE_PIP_VERSION_CHECK=on \
     PATH="/root/.cargo/bin:${PATH}" \
     PYTHONIOENCODING=utf-8 \
     PYTHONUNBUFFERED=1
+# Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
+    git git-lfs python3 python3-pip curl ca-certificates build-essential && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+# Install Rust (required for compiling tokenizers if needed)
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+# Set the working directory and install Python dependencies
 WORKDIR /app
 COPY requirements.txt .
+RUN pip3 install --no-cache-dir --upgrade pip && \
+    pip3 install --no-cache-dir -r requirements.txt
+# Use build-time argument for Hugging Face token
 ARG HF_TOKEN
+# Download the model during build
 RUN echo "Downloading model mistralai/Mistral-7B-Instruct-v0.1..." && \
     python3 <<EOF
+import os, sys
 from huggingface_hub import snapshot_download
 token = os.environ.get('HF_TOKEN')
 if not token:
+    print('Warning: HF_TOKEN not provided.', file=sys.stderr)
 try:
+    snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1',
+                      local_dir='/app/model', token=token)
 except Exception as e:
+    print(f'Download error: {e}', file=sys.stderr)
     sys.exit(1)
 EOF
+# Expose the port and define a healthcheck
 EXPOSE 8000
 HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
+  CMD curl --fail http://localhost:8000/health || exit 1
+# Start the vLLM API server
+CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]