enskaff commited on
Commit
6f2c834
·
verified ·
1 Parent(s): 2d5cc5e

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +16 -11
Dockerfile CHANGED
@@ -1,43 +1,48 @@
1
  # syntax=docker/dockerfile:1
2
  FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
3
 
4
- # Set environment variables
5
  ENV DEBIAN_FRONTEND=noninteractive \
6
  PIP_NO_CACHE_DIR=off \
7
  PIP_DISABLE_PIP_VERSION_CHECK=on \
8
  PYTHONIOENCODING=utf-8 \
9
- PYTHONUNBUFFERED=1
 
 
10
 
11
- # Install required system packages
 
 
 
12
  RUN apt-get update && apt-get install -y --no-install-recommends \
13
  git git-lfs python3 python3-pip curl ca-certificates build-essential && \
14
  apt-get clean && rm -rf /var/lib/apt/lists/*
15
 
16
- # Install Rust (if needed for building dependencies)
17
  RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
18
 
19
- # Set working directory and install Python dependencies
20
  WORKDIR /app
 
 
21
  COPY requirements.txt .
22
  RUN pip3 install --no-cache-dir --upgrade pip && \
23
  pip3 install --no-cache-dir -r requirements.txt
24
 
25
- # Download the model using BuildKit secret mount.
26
- # This command uses the secret HF_TOKEN, which is passed only during this RUN and not stored in the image.
27
  RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
28
  python3 -c "import os; \
29
  from huggingface_hub import snapshot_download; \
30
- token = os.getenv('HF_TOKEN'); \
31
  assert token, 'HF_TOKEN is not set!'; \
32
  print('Token is set. Downloading model...'); \
33
  snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token)"
34
 
35
- # Expose port 8000
36
  EXPOSE 8000
37
 
38
- # Healthcheck (optional)
39
  HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
40
  CMD curl --fail http://localhost:8000/health || exit 1
41
 
42
- # Set the entrypoint to run the vLLM API server
43
  CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]
 
1
  # syntax=docker/dockerfile:1
2
  FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
3
 
4
+ # Set essential environment variables and disable numba caching
5
  ENV DEBIAN_FRONTEND=noninteractive \
6
  PIP_NO_CACHE_DIR=off \
7
  PIP_DISABLE_PIP_VERSION_CHECK=on \
8
  PYTHONIOENCODING=utf-8 \
9
+ PYTHONUNBUFFERED=1 \
10
+ TRANSFORMERS_CACHE=/app/transformers_cache \
11
+ NUMBA_DISABLE_CACHE=1
12
 
13
+ # Create the transformers cache directory and ensure it's writable
14
+ RUN mkdir -p /app/transformers_cache && chmod -R 777 /app/transformers_cache
15
+
16
+ # Install system dependencies
17
  RUN apt-get update && apt-get install -y --no-install-recommends \
18
  git git-lfs python3 python3-pip curl ca-certificates build-essential && \
19
  apt-get clean && rm -rf /var/lib/apt/lists/*
20
 
21
+ # Optionally, install Rust (needed for some tokenizers or building packages)
22
  RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
23
 
 
24
  WORKDIR /app
25
+
26
+ # Copy requirements file and install Python dependencies
27
  COPY requirements.txt .
28
  RUN pip3 install --no-cache-dir --upgrade pip && \
29
  pip3 install --no-cache-dir -r requirements.txt
30
 
31
+ # Use BuildKit secret mount to securely inject HF_TOKEN during build.
 
32
  RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
33
  python3 -c "import os; \
34
  from huggingface_hub import snapshot_download; \
35
+ token = os.environ.get('HF_TOKEN'); \
36
  assert token, 'HF_TOKEN is not set!'; \
37
  print('Token is set. Downloading model...'); \
38
  snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token)"
39
 
40
+ # Expose the port for the API server
41
  EXPOSE 8000
42
 
43
+ # Healthcheck to verify API server is responding
44
  HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
45
  CMD curl --fail http://localhost:8000/health || exit 1
46
 
47
+ # Launch the vLLM OpenAI-style API server, pointing to the downloaded model directory.
48
  CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]