enskaff commited on
Commit
9cb790e
·
verified ·
1 Parent(s): 84c5bef

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +13 -5
Dockerfile CHANGED
@@ -1,5 +1,6 @@
1
  # syntax=docker/dockerfile:1
2
- FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
 
3
 
4
  # Set essential environment variables and disable numba caching
5
  ENV DEBIAN_FRONTEND=noninteractive \
@@ -12,8 +13,10 @@ ENV DEBIAN_FRONTEND=noninteractive \
12
  NUMBA_DISABLE_CACHE=1 \
13
  NUMBA_CACHE_DIR=/tmp/numba_cache
14
 
15
- # Create the transformers cache directory and ensure it's writable
16
- RUN mkdir -p /app/transformers_cache && chmod -R 777 /app/transformers_cache
 
 
17
 
18
  # Install system dependencies
19
  RUN apt-get update && apt-get install -y --no-install-recommends \
@@ -21,23 +24,28 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
21
  apt-get clean && rm -rf /var/lib/apt/lists/*
22
 
23
  # Optionally, install Rust (needed for some tokenizers or building packages)
 
 
24
  RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
25
 
26
  WORKDIR /app
27
 
28
  # Copy requirements file and install Python dependencies
29
  COPY requirements.txt .
 
30
  RUN pip3 install --no-cache-dir --upgrade pip && \
31
  pip3 install --no-cache-dir -r requirements.txt
32
 
33
  # Use BuildKit secret mount to securely inject HF_TOKEN during build.
 
34
  RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
35
  python3 -c "import os; \
36
  from huggingface_hub import snapshot_download; \
37
  token = os.environ.get('HF_TOKEN'); \
38
  assert token, 'HF_TOKEN is not set!'; \
39
  print('Token is set. Downloading model...'); \
40
- snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token)"
 
41
 
42
  # Expose the port for the API server
43
  EXPOSE 8000
@@ -47,4 +55,4 @@ HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
47
  CMD curl --fail http://localhost:8000/health || exit 1
48
 
49
  # Launch the vLLM OpenAI-style API server, pointing to the downloaded model directory.
50
- CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]
 
1
  # syntax=docker/dockerfile:1
2
+ # Use a more recent base image if possible (check vLLM compatibility)
3
+ FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04 # Or preferably a newer supported one
4
 
5
  # Set essential environment variables and disable numba caching
6
  ENV DEBIAN_FRONTEND=noninteractive \
 
13
  NUMBA_DISABLE_CACHE=1 \
14
  NUMBA_CACHE_DIR=/tmp/numba_cache
15
 
16
+ # Create the necessary cache directories and ensure they are writable
17
+ # Use the ENV variables for consistency
18
+ RUN mkdir -p ${HF_HOME} && chmod -R 777 ${HF_HOME} && \
19
+ mkdir -p ${NUMBA_CACHE_DIR} && chmod -R 777 ${NUMBA_CACHE_DIR}
20
 
21
  # Install system dependencies
22
  RUN apt-get update && apt-get install -y --no-install-recommends \
 
24
  apt-get clean && rm -rf /var/lib/apt/lists/*
25
 
26
  # Optionally, install Rust (needed for some tokenizers or building packages)
27
+ # Consider adding PATH update for Rust binaries if needed later
28
+ ENV PATH="/root/.cargo/bin:${PATH}"
29
  RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
30
 
31
  WORKDIR /app
32
 
33
  # Copy requirements file and install Python dependencies
34
  COPY requirements.txt .
35
+ # Consider upgrading vllm here if the error persists: pip3 install --no-cache-dir --upgrade vllm
36
  RUN pip3 install --no-cache-dir --upgrade pip && \
37
  pip3 install --no-cache-dir -r requirements.txt
38
 
39
  # Use BuildKit secret mount to securely inject HF_TOKEN during build.
40
+ # Ensure the downloaded model exists and is accessible
41
  RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
42
  python3 -c "import os; \
43
  from huggingface_hub import snapshot_download; \
44
  token = os.environ.get('HF_TOKEN'); \
45
  assert token, 'HF_TOKEN is not set!'; \
46
  print('Token is set. Downloading model...'); \
47
+ snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token, local_dir_use_symlinks=False)" \
48
+ && ls -l /app/model # Add a check to see if download worked
49
 
50
  # Expose the port for the API server
51
  EXPOSE 8000
 
55
  CMD curl --fail http://localhost:8000/health || exit 1
56
 
57
  # Launch the vLLM OpenAI-style API server, pointing to the downloaded model directory.
58
+ CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]