Spaces:

enskaff
/

mistral-vllm-chat

Runtime error

App Files Files Community

enskaff commited on Apr 12

Commit

9cb790e

verified ·

1 Parent(s): 84c5bef

Update Dockerfile

Browse files

Files changed (1) hide show

Dockerfile +13 -5

Dockerfile CHANGED Viewed

@@ -1,5 +1,6 @@
 # syntax=docker/dockerfile:1
-FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
 # Set essential environment variables and disable numba caching
 ENV DEBIAN_FRONTEND=noninteractive \
@@ -12,8 +13,10 @@ ENV DEBIAN_FRONTEND=noninteractive \
     NUMBA_DISABLE_CACHE=1 \
     NUMBA_CACHE_DIR=/tmp/numba_cache
-# Create the transformers cache directory and ensure it's writable
-RUN mkdir -p /app/transformers_cache && chmod -R 777 /app/transformers_cache
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -21,23 +24,28 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     apt-get clean && rm -rf /var/lib/apt/lists/*
 # Optionally, install Rust (needed for some tokenizers or building packages)
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
 WORKDIR /app
 # Copy requirements file and install Python dependencies
 COPY requirements.txt .
 RUN pip3 install --no-cache-dir --upgrade pip && \
     pip3 install --no-cache-dir -r requirements.txt
 # Use BuildKit secret mount to securely inject HF_TOKEN during build.
 RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
     python3 -c "import os; \
 from huggingface_hub import snapshot_download; \
 token = os.environ.get('HF_TOKEN'); \
 assert token, 'HF_TOKEN is not set!'; \
 print('Token is set. Downloading model...'); \
-snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token)"
 # Expose the port for the API server
 EXPOSE 8000
@@ -47,4 +55,4 @@ HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
     CMD curl --fail http://localhost:8000/health || exit 1
 # Launch the vLLM OpenAI-style API server, pointing to the downloaded model directory.
-CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]

 # syntax=docker/dockerfile:1
+# Use a more recent base image if possible (check vLLM compatibility)
+FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04 # Or preferably a newer supported one
 # Set essential environment variables and disable numba caching
 ENV DEBIAN_FRONTEND=noninteractive \
     NUMBA_DISABLE_CACHE=1 \
     NUMBA_CACHE_DIR=/tmp/numba_cache
+# Create the necessary cache directories and ensure they are writable
+# Use the ENV variables for consistency
+RUN mkdir -p ${HF_HOME} && chmod -R 777 ${HF_HOME} && \
+    mkdir -p ${NUMBA_CACHE_DIR} && chmod -R 777 ${NUMBA_CACHE_DIR}
 # Install system dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
     apt-get clean && rm -rf /var/lib/apt/lists/*
 # Optionally, install Rust (needed for some tokenizers or building packages)
+# Consider adding PATH update for Rust binaries if needed later
+ENV PATH="/root/.cargo/bin:${PATH}"
 RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
 WORKDIR /app
 # Copy requirements file and install Python dependencies
 COPY requirements.txt .
+# Consider upgrading vllm here if the error persists: pip3 install --no-cache-dir --upgrade vllm
 RUN pip3 install --no-cache-dir --upgrade pip && \
     pip3 install --no-cache-dir -r requirements.txt
 # Use BuildKit secret mount to securely inject HF_TOKEN during build.
+# Ensure the downloaded model exists and is accessible
 RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
     python3 -c "import os; \
 from huggingface_hub import snapshot_download; \
 token = os.environ.get('HF_TOKEN'); \
 assert token, 'HF_TOKEN is not set!'; \
 print('Token is set. Downloading model...'); \
+snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token, local_dir_use_symlinks=False)" \
+    && ls -l /app/model # Add a check to see if download worked
 # Expose the port for the API server
 EXPOSE 8000
     CMD curl --fail http://localhost:8000/health || exit 1
 # Launch the vLLM OpenAI-style API server, pointing to the downloaded model directory.
+CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]