Spaces:
Runtime error
Runtime error
Update Dockerfile
Browse files- Dockerfile +16 -11
Dockerfile
CHANGED
@@ -1,43 +1,48 @@
|
|
1 |
# syntax=docker/dockerfile:1
|
2 |
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
|
3 |
|
4 |
-
# Set environment variables
|
5 |
ENV DEBIAN_FRONTEND=noninteractive \
|
6 |
PIP_NO_CACHE_DIR=off \
|
7 |
PIP_DISABLE_PIP_VERSION_CHECK=on \
|
8 |
PYTHONIOENCODING=utf-8 \
|
9 |
-
PYTHONUNBUFFERED=1
|
|
|
|
|
10 |
|
11 |
-
#
|
|
|
|
|
|
|
12 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
13 |
git git-lfs python3 python3-pip curl ca-certificates build-essential && \
|
14 |
apt-get clean && rm -rf /var/lib/apt/lists/*
|
15 |
|
16 |
-
#
|
17 |
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
18 |
|
19 |
-
# Set working directory and install Python dependencies
|
20 |
WORKDIR /app
|
|
|
|
|
21 |
COPY requirements.txt .
|
22 |
RUN pip3 install --no-cache-dir --upgrade pip && \
|
23 |
pip3 install --no-cache-dir -r requirements.txt
|
24 |
|
25 |
-
#
|
26 |
-
# This command uses the secret HF_TOKEN, which is passed only during this RUN and not stored in the image.
|
27 |
RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
|
28 |
python3 -c "import os; \
|
29 |
from huggingface_hub import snapshot_download; \
|
30 |
-
token = os.
|
31 |
assert token, 'HF_TOKEN is not set!'; \
|
32 |
print('Token is set. Downloading model...'); \
|
33 |
snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token)"
|
34 |
|
35 |
-
# Expose port
|
36 |
EXPOSE 8000
|
37 |
|
38 |
-
# Healthcheck
|
39 |
HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
|
40 |
CMD curl --fail http://localhost:8000/health || exit 1
|
41 |
|
42 |
-
#
|
43 |
CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]
|
|
|
1 |
# syntax=docker/dockerfile:1
|
2 |
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
|
3 |
|
4 |
+
# Set essential environment variables and disable numba caching
|
5 |
ENV DEBIAN_FRONTEND=noninteractive \
|
6 |
PIP_NO_CACHE_DIR=off \
|
7 |
PIP_DISABLE_PIP_VERSION_CHECK=on \
|
8 |
PYTHONIOENCODING=utf-8 \
|
9 |
+
PYTHONUNBUFFERED=1 \
|
10 |
+
TRANSFORMERS_CACHE=/app/transformers_cache \
|
11 |
+
NUMBA_DISABLE_CACHE=1
|
12 |
|
13 |
+
# Create the transformers cache directory and ensure it's writable
|
14 |
+
RUN mkdir -p /app/transformers_cache && chmod -R 777 /app/transformers_cache
|
15 |
+
|
16 |
+
# Install system dependencies
|
17 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
18 |
git git-lfs python3 python3-pip curl ca-certificates build-essential && \
|
19 |
apt-get clean && rm -rf /var/lib/apt/lists/*
|
20 |
|
21 |
+
# Optionally, install Rust (needed for some tokenizers or building packages)
|
22 |
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
23 |
|
|
|
24 |
WORKDIR /app
|
25 |
+
|
26 |
+
# Copy requirements file and install Python dependencies
|
27 |
COPY requirements.txt .
|
28 |
RUN pip3 install --no-cache-dir --upgrade pip && \
|
29 |
pip3 install --no-cache-dir -r requirements.txt
|
30 |
|
31 |
+
# Use BuildKit secret mount to securely inject HF_TOKEN during build.
|
|
|
32 |
RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
|
33 |
python3 -c "import os; \
|
34 |
from huggingface_hub import snapshot_download; \
|
35 |
+
token = os.environ.get('HF_TOKEN'); \
|
36 |
assert token, 'HF_TOKEN is not set!'; \
|
37 |
print('Token is set. Downloading model...'); \
|
38 |
snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token)"
|
39 |
|
40 |
+
# Expose the port for the API server
|
41 |
EXPOSE 8000
|
42 |
|
43 |
+
# Healthcheck to verify API server is responding
|
44 |
HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
|
45 |
CMD curl --fail http://localhost:8000/health || exit 1
|
46 |
|
47 |
+
# Launch the vLLM OpenAI-style API server, pointing to the downloaded model directory.
|
48 |
CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]
|