Spaces:
Runtime error
Runtime error
Update Dockerfile
Browse files- Dockerfile +68 -21
Dockerfile
CHANGED
@@ -1,40 +1,87 @@
|
|
|
|
1 |
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
|
2 |
|
3 |
-
#
|
4 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
git \
|
6 |
git-lfs \
|
7 |
python3 \
|
8 |
python3-pip \
|
9 |
curl \
|
|
|
10 |
build-essential \
|
|
|
11 |
&& rm -rf /var/lib/apt/lists/*
|
12 |
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
ENV PATH="/root/.cargo/bin:${PATH}"
|
17 |
|
18 |
-
#
|
19 |
-
|
20 |
|
21 |
-
#
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
ARG HF_TOKEN
|
26 |
-
ENV HF_TOKEN=${HF_TOKEN}
|
27 |
|
28 |
-
#
|
29 |
-
|
|
|
|
|
|
|
30 |
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
|
|
|
|
35 |
|
36 |
-
# Expose port
|
37 |
EXPOSE 8000
|
38 |
|
39 |
-
#
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Use a specific CUDA version and OS combination
|
2 |
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
|
3 |
|
4 |
+
# Set environment variables to ensure non-interactive installs
|
5 |
+
ENV DEBIAN_FRONTEND=noninteractive \
|
6 |
+
PIP_NO_CACHE_DIR=off \
|
7 |
+
PIP_DISABLE_PIP_VERSION_CHECK=on \
|
8 |
+
# Set path for Rust/Cargo
|
9 |
+
PATH="/root/.cargo/bin:${PATH}" \
|
10 |
+
# Set default Python encoding (good practice)
|
11 |
+
PYTHONIOENCODING=utf-8 \
|
12 |
+
PYTHONUNBUFFERED=1
|
13 |
+
|
14 |
+
# System packages:
|
15 |
+
# - Combine update and install in one layer to reduce size.
|
16 |
+
# - Use --no-install-recommends to avoid unnecessary packages.
|
17 |
+
# - Install build tools, git, curl, python, pip, and ca-certificates (for HTTPS).
|
18 |
+
# - Clean up apt cache afterwards.
|
19 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
20 |
git \
|
21 |
git-lfs \
|
22 |
python3 \
|
23 |
python3-pip \
|
24 |
curl \
|
25 |
+
ca-certificates \
|
26 |
build-essential \
|
27 |
+
&& apt-get clean \
|
28 |
&& rm -rf /var/lib/apt/lists/*
|
29 |
|
30 |
+
# Install Rust using the recommended secure method
|
31 |
+
# Needed for tokenizers compilation if wheels are not available
|
32 |
+
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
33 |
+
# Note: Rust and build-essential add significantly to image size.
|
34 |
+
# Consider a multi-stage build if size is critical (see notes below).
|
35 |
|
36 |
+
# Set up the application directory
|
37 |
+
WORKDIR /app
|
|
|
38 |
|
39 |
+
# Copy requirements file first to leverage Docker cache
|
40 |
+
COPY requirements.txt .
|
41 |
|
42 |
+
# Install Python dependencies from requirements file
|
43 |
+
# Upgrading pip first is good practice.
|
44 |
+
RUN pip3 install --no-cache-dir --upgrade pip
|
45 |
+
RUN pip3 install --no-cache-dir -r requirements.txt
|
|
|
|
|
46 |
|
47 |
+
# --- Model Download ---
|
48 |
+
# Use ARG for build-time secret (HF Token)
|
49 |
+
ARG HF_TOKEN
|
50 |
+
# Check if HF_TOKEN was provided during build
|
51 |
+
RUN if [ -z "$HF_TOKEN" ]; then echo "Error: HF_TOKEN build argument is required." && exit 1; fi
|
52 |
|
53 |
+
# Download the model using huggingface_hub Python library
|
54 |
+
# Storing the token in an intermediate ENV variable is okay here as it's needed by the script.
|
55 |
+
# The layer containing the ENV declaration itself won't persist the token in the final image history in the same way as hardcoding it.
|
56 |
+
# Ensure your CI/CD or build process handles the ARG securely.
|
57 |
+
RUN echo "Downloading model mistralai/Mistral-7B-Instruct-v0.1..." && \
|
58 |
+
HF_TOKEN=${HF_TOKEN} python3 -c "
|
59 |
+
from huggingface_hub import snapshot_download
|
60 |
+
import os
|
61 |
+
token = os.environ.get('HF_TOKEN')
|
62 |
+
if not token:
|
63 |
+
raise ValueError('HF_TOKEN environment variable not set inside the script')
|
64 |
+
snapshot_download(
|
65 |
+
repo_id='mistralai/Mistral-7B-Instruct-v0.1',
|
66 |
+
local_dir='/app/model',
|
67 |
+
token=token,
|
68 |
+
# Optional: Add ignore_patterns if you know you only need specific file types
|
69 |
+
# ignore_patterns=['*.safetensors', '*.h5', '*.msgpack']
|
70 |
+
)
|
71 |
+
print('Model download complete.')
|
72 |
+
"
|
73 |
|
74 |
+
# Grant execute permissions if needed (though likely not for model files)
|
75 |
+
# RUN chmod -R +r /app/model
|
76 |
|
77 |
+
# Expose the port vLLM will run on
|
78 |
EXPOSE 8000
|
79 |
|
80 |
+
# Healthcheck (Optional but recommended for Spaces)
|
81 |
+
# Checks if the API server is responding on port 8000
|
82 |
+
HEALTHCHECK --interval=15s --timeout=5s --start-period=30s --retries=3 \
|
83 |
+
CMD curl --fail http://localhost:8000/health || exit 1
|
84 |
+
|
85 |
+
# Define the entrypoint command
|
86 |
+
# Using python3 -m is standard practice
|
87 |
+
CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]
|