Spaces:
Runtime error
Runtime error
Update Dockerfile
Browse files- Dockerfile +13 -5
Dockerfile
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
# syntax=docker/dockerfile:1
|
2 |
-
|
|
|
3 |
|
4 |
# Set essential environment variables and disable numba caching
|
5 |
ENV DEBIAN_FRONTEND=noninteractive \
|
@@ -12,8 +13,10 @@ ENV DEBIAN_FRONTEND=noninteractive \
|
|
12 |
NUMBA_DISABLE_CACHE=1 \
|
13 |
NUMBA_CACHE_DIR=/tmp/numba_cache
|
14 |
|
15 |
-
# Create the
|
16 |
-
|
|
|
|
|
17 |
|
18 |
# Install system dependencies
|
19 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
@@ -21,23 +24,28 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
21 |
apt-get clean && rm -rf /var/lib/apt/lists/*
|
22 |
|
23 |
# Optionally, install Rust (needed for some tokenizers or building packages)
|
|
|
|
|
24 |
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
25 |
|
26 |
WORKDIR /app
|
27 |
|
28 |
# Copy requirements file and install Python dependencies
|
29 |
COPY requirements.txt .
|
|
|
30 |
RUN pip3 install --no-cache-dir --upgrade pip && \
|
31 |
pip3 install --no-cache-dir -r requirements.txt
|
32 |
|
33 |
# Use BuildKit secret mount to securely inject HF_TOKEN during build.
|
|
|
34 |
RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
|
35 |
python3 -c "import os; \
|
36 |
from huggingface_hub import snapshot_download; \
|
37 |
token = os.environ.get('HF_TOKEN'); \
|
38 |
assert token, 'HF_TOKEN is not set!'; \
|
39 |
print('Token is set. Downloading model...'); \
|
40 |
-
snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token)"
|
|
|
41 |
|
42 |
# Expose the port for the API server
|
43 |
EXPOSE 8000
|
@@ -47,4 +55,4 @@ HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
|
|
47 |
CMD curl --fail http://localhost:8000/health || exit 1
|
48 |
|
49 |
# Launch the vLLM OpenAI-style API server, pointing to the downloaded model directory.
|
50 |
-
CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]
|
|
|
1 |
# syntax=docker/dockerfile:1
|
2 |
+
# Use a more recent base image if possible (check vLLM compatibility)
|
3 |
+
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04 # Or preferably a newer supported one
|
4 |
|
5 |
# Set essential environment variables and disable numba caching
|
6 |
ENV DEBIAN_FRONTEND=noninteractive \
|
|
|
13 |
NUMBA_DISABLE_CACHE=1 \
|
14 |
NUMBA_CACHE_DIR=/tmp/numba_cache
|
15 |
|
16 |
+
# Create the necessary cache directories and ensure they are writable
|
17 |
+
# Use the ENV variables for consistency
|
18 |
+
RUN mkdir -p ${HF_HOME} && chmod -R 777 ${HF_HOME} && \
|
19 |
+
mkdir -p ${NUMBA_CACHE_DIR} && chmod -R 777 ${NUMBA_CACHE_DIR}
|
20 |
|
21 |
# Install system dependencies
|
22 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
|
|
24 |
apt-get clean && rm -rf /var/lib/apt/lists/*
|
25 |
|
26 |
# Optionally, install Rust (needed for some tokenizers or building packages)
|
27 |
+
# Consider adding PATH update for Rust binaries if needed later
|
28 |
+
ENV PATH="/root/.cargo/bin:${PATH}"
|
29 |
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
|
30 |
|
31 |
WORKDIR /app
|
32 |
|
33 |
# Copy requirements file and install Python dependencies
|
34 |
COPY requirements.txt .
|
35 |
+
# Consider upgrading vllm here if the error persists: pip3 install --no-cache-dir --upgrade vllm
|
36 |
RUN pip3 install --no-cache-dir --upgrade pip && \
|
37 |
pip3 install --no-cache-dir -r requirements.txt
|
38 |
|
39 |
# Use BuildKit secret mount to securely inject HF_TOKEN during build.
|
40 |
+
# Ensure the downloaded model exists and is accessible
|
41 |
RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
|
42 |
python3 -c "import os; \
|
43 |
from huggingface_hub import snapshot_download; \
|
44 |
token = os.environ.get('HF_TOKEN'); \
|
45 |
assert token, 'HF_TOKEN is not set!'; \
|
46 |
print('Token is set. Downloading model...'); \
|
47 |
+
snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token, local_dir_use_symlinks=False)" \
|
48 |
+
&& ls -l /app/model # Add a check to see if download worked
|
49 |
|
50 |
# Expose the port for the API server
|
51 |
EXPOSE 8000
|
|
|
55 |
CMD curl --fail http://localhost:8000/health || exit 1
|
56 |
|
57 |
# Launch the vLLM OpenAI-style API server, pointing to the downloaded model directory.
|
58 |
+
CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]
|