Spaces:
Runtime error
Runtime error
Update Dockerfile
Browse files- Dockerfile +20 -79
Dockerfile
CHANGED
@@ -1,110 +1,51 @@
|
|
1 |
-
# Use
|
2 |
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
|
3 |
|
4 |
-
# Set environment variables
|
5 |
ENV DEBIAN_FRONTEND=noninteractive \
|
6 |
PIP_NO_CACHE_DIR=off \
|
7 |
PIP_DISABLE_PIP_VERSION_CHECK=on \
|
8 |
-
# Set path for Rust/Cargo
|
9 |
PATH="/root/.cargo/bin:${PATH}" \
|
10 |
-
# Set default Python encoding (good practice)
|
11 |
PYTHONIOENCODING=utf-8 \
|
12 |
PYTHONUNBUFFERED=1
|
13 |
|
14 |
-
#
|
15 |
-
# - Combine update and install in one layer to reduce size.
|
16 |
-
# - Use --no-install-recommends to avoid unnecessary packages.
|
17 |
-
# - Install build tools, git, git-lfs, curl, python, pip, and ca-certificates (for HTTPS).
|
18 |
-
# - Clean up apt cache afterwards.
|
19 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
20 |
-
git \
|
21 |
-
|
22 |
-
python3 \
|
23 |
-
python3-pip \
|
24 |
-
curl \
|
25 |
-
ca-certificates \
|
26 |
-
build-essential \
|
27 |
-
&& apt-get clean \
|
28 |
-
&& rm -rf /var/lib/apt/lists/*
|
29 |
|
30 |
-
# Install Rust
|
31 |
-
# Needed for tokenizers compilation if wheels are not available
|
32 |
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
33 |
-
# Note: Rust and build-essential add significantly to image size.
|
34 |
-
# Consider a multi-stage build if size is critical.
|
35 |
|
36 |
-
# Set
|
37 |
WORKDIR /app
|
38 |
-
|
39 |
-
# Copy requirements file first to leverage Docker cache
|
40 |
-
# Ensure you have a requirements.txt file in your repository!
|
41 |
COPY requirements.txt .
|
|
|
|
|
42 |
|
43 |
-
#
|
44 |
-
# Upgrading pip first is good practice.
|
45 |
-
RUN pip3 install --no-cache-dir --upgrade pip
|
46 |
-
RUN pip3 install --no-cache-dir -r requirements.txt
|
47 |
-
|
48 |
-
# --- Model Download ---
|
49 |
-
# Use ARG for build-time secret (HF Token). HF Spaces automatically injects matching secrets.
|
50 |
ARG HF_TOKEN
|
51 |
|
52 |
-
#
|
53 |
-
# RUN if [ -z "$HF_TOKEN" ]; then echo "Error: Build argument HF_TOKEN is required but was not provided." && exit 1; fi
|
54 |
-
# Optional: Comment out the check above if it causes issues on HF platform and you trust the secret injection
|
55 |
-
|
56 |
-
# Download the model using huggingface_hub Python library via a here-document
|
57 |
-
# This passes the script to python3's stdin, avoiding potential parsing issues with -c "..."
|
58 |
RUN echo "Downloading model mistralai/Mistral-7B-Instruct-v0.1..." && \
|
59 |
-
# Use python3 reading from stdin (here-document)
|
60 |
python3 <<EOF
|
61 |
-
|
62 |
-
import os
|
63 |
-
import sys
|
64 |
from huggingface_hub import snapshot_download
|
65 |
-
|
66 |
-
print('Python environment ready for download script.')
|
67 |
-
|
68 |
-
# HF_TOKEN build arg is automatically available as an environment variable
|
69 |
-
# within the context of this RUN command by the Docker builder / HF platform.
|
70 |
token = os.environ.get('HF_TOKEN')
|
71 |
-
|
72 |
if not token:
|
73 |
-
print('Warning: HF_TOKEN
|
74 |
-
# Depending on model visibility, download might proceed or fail.
|
75 |
-
# For public models like Mistral, it might work without a token.
|
76 |
-
|
77 |
-
print(f'Attempting download using token: {"Token Provided" if token else "No Token Provided"}')
|
78 |
-
|
79 |
try:
|
80 |
-
snapshot_download(
|
81 |
-
|
82 |
-
local_dir='/app/model',
|
83 |
-
token=token, # Pass the token (can be None)
|
84 |
-
# Optional: Add ignore_patterns if you know you only need specific file types
|
85 |
-
# ignore_patterns=['*.safetensors', '*.h5', '*.msgpack']
|
86 |
-
# Add user_agent for better tracking/debugging on HF side
|
87 |
-
user_agent={'dockerfile': 'huggingface-space-vllm-heredoc'}
|
88 |
-
)
|
89 |
-
print('Model download complete.')
|
90 |
except Exception as e:
|
91 |
-
print(f'
|
92 |
-
# Exit with error code to fail the Docker build if download fails
|
93 |
sys.exit(1)
|
94 |
-
|
95 |
-
# --- End of Python Script ---
|
96 |
EOF
|
97 |
-
# Note: The 'EOF' marker above MUST be at the start of the line with no preceding spaces.
|
98 |
|
99 |
-
# Expose the port
|
100 |
EXPOSE 8000
|
101 |
-
|
102 |
-
# Healthcheck (Optional but recommended for Spaces)
|
103 |
-
# Checks if the API server is responding on port 8000
|
104 |
-
# Wait longer initially (start-period) for model loading
|
105 |
HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
|
106 |
-
|
107 |
|
108 |
-
#
|
109 |
-
|
110 |
-
CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]
|
|
|
1 |
+
# Use the NVIDIA CUDA runtime image for GPU support
|
2 |
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
|
3 |
|
4 |
+
# Set environment variables
|
5 |
ENV DEBIAN_FRONTEND=noninteractive \
|
6 |
PIP_NO_CACHE_DIR=off \
|
7 |
PIP_DISABLE_PIP_VERSION_CHECK=on \
|
|
|
8 |
PATH="/root/.cargo/bin:${PATH}" \
|
|
|
9 |
PYTHONIOENCODING=utf-8 \
|
10 |
PYTHONUNBUFFERED=1
|
11 |
|
12 |
+
# Install system dependencies
|
|
|
|
|
|
|
|
|
13 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
14 |
+
git git-lfs python3 python3-pip curl ca-certificates build-essential && \
|
15 |
+
apt-get clean && rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
# Install Rust (required for compiling tokenizers if needed)
|
|
|
18 |
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
|
|
|
|
19 |
|
20 |
+
# Set the working directory and install Python dependencies
|
21 |
WORKDIR /app
|
|
|
|
|
|
|
22 |
COPY requirements.txt .
|
23 |
+
RUN pip3 install --no-cache-dir --upgrade pip && \
|
24 |
+
pip3 install --no-cache-dir -r requirements.txt
|
25 |
|
26 |
+
# Use build-time argument for Hugging Face token
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
ARG HF_TOKEN
|
28 |
|
29 |
+
# Download the model during build
|
|
|
|
|
|
|
|
|
|
|
30 |
RUN echo "Downloading model mistralai/Mistral-7B-Instruct-v0.1..." && \
|
|
|
31 |
python3 <<EOF
|
32 |
+
import os, sys
|
|
|
|
|
33 |
from huggingface_hub import snapshot_download
|
|
|
|
|
|
|
|
|
|
|
34 |
token = os.environ.get('HF_TOKEN')
|
|
|
35 |
if not token:
|
36 |
+
print('Warning: HF_TOKEN not provided.', file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
|
37 |
try:
|
38 |
+
snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1',
|
39 |
+
local_dir='/app/model', token=token)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
except Exception as e:
|
41 |
+
print(f'Download error: {e}', file=sys.stderr)
|
|
|
42 |
sys.exit(1)
|
|
|
|
|
43 |
EOF
|
|
|
44 |
|
45 |
+
# Expose the port and define a healthcheck
|
46 |
EXPOSE 8000
|
|
|
|
|
|
|
|
|
47 |
HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
|
48 |
+
CMD curl --fail http://localhost:8000/health || exit 1
|
49 |
|
50 |
+
# Start the vLLM API server
|
51 |
+
CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]
|
|