enskaff commited on
Commit
336ec47
·
verified ·
1 Parent(s): e6ed01f

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +68 -21
Dockerfile CHANGED
@@ -1,40 +1,87 @@
 
1
  FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
2
 
3
- # System packages (including curl and build tools)
4
- RUN apt-get update && apt-get install -y \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  git \
6
  git-lfs \
7
  python3 \
8
  python3-pip \
9
  curl \
 
10
  build-essential \
 
11
  && rm -rf /var/lib/apt/lists/*
12
 
 
 
 
 
 
13
 
14
- # Install Rust (needed for tokenizers)
15
- RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
16
- ENV PATH="/root/.cargo/bin:${PATH}"
17
 
18
- # Python packages
19
- RUN pip3 install vllm accelerate
20
 
21
- # Download Mistral model from Hugging Face
22
- RUN mkdir /app
23
- WORKDIR /app
24
- # Set HF token (replace with your own or use an ARG)
25
- ARG HF_TOKEN
26
- ENV HF_TOKEN=${HF_TOKEN}
27
 
28
- # Download model using Hugging Face Hub
29
- RUN pip install huggingface_hub
 
 
 
30
 
31
- RUN python3 -c "\
32
- from huggingface_hub import snapshot_download; \
33
- snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token='${HF_TOKEN}')"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
 
 
35
 
36
- # Expose port for API
37
  EXPOSE 8000
38
 
39
- # Run vLLM API server
40
- CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]
 
 
 
 
 
 
 
1
+ # Use a specific CUDA version and OS combination
2
  FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
3
 
4
+ # Set environment variables to ensure non-interactive installs
5
+ ENV DEBIAN_FRONTEND=noninteractive \
6
+ PIP_NO_CACHE_DIR=off \
7
+ PIP_DISABLE_PIP_VERSION_CHECK=on \
8
+ # Set path for Rust/Cargo
9
+ PATH="/root/.cargo/bin:${PATH}" \
10
+ # Set default Python encoding (good practice)
11
+ PYTHONIOENCODING=utf-8 \
12
+ PYTHONUNBUFFERED=1
13
+
14
+ # System packages:
15
+ # - Combine update and install in one layer to reduce size.
16
+ # - Use --no-install-recommends to avoid unnecessary packages.
17
+ # - Install build tools, git, curl, python, pip, and ca-certificates (for HTTPS).
18
+ # - Clean up apt cache afterwards.
19
+ RUN apt-get update && apt-get install -y --no-install-recommends \
20
  git \
21
  git-lfs \
22
  python3 \
23
  python3-pip \
24
  curl \
25
+ ca-certificates \
26
  build-essential \
27
+ && apt-get clean \
28
  && rm -rf /var/lib/apt/lists/*
29
 
30
+ # Install Rust using the recommended secure method
31
+ # Needed for tokenizers compilation if wheels are not available
32
+ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
33
+ # Note: Rust and build-essential add significantly to image size.
34
+ # Consider a multi-stage build if size is critical (see notes below).
35
 
36
+ # Set up the application directory
37
+ WORKDIR /app
 
38
 
39
+ # Copy requirements file first to leverage Docker cache
40
+ COPY requirements.txt .
41
 
42
+ # Install Python dependencies from requirements file
43
+ # Upgrading pip first is good practice.
44
+ RUN pip3 install --no-cache-dir --upgrade pip
45
+ RUN pip3 install --no-cache-dir -r requirements.txt
 
 
46
 
47
+ # --- Model Download ---
48
+ # Use ARG for build-time secret (HF Token)
49
+ ARG HF_TOKEN
50
+ # Check if HF_TOKEN was provided during build
51
+ RUN if [ -z "$HF_TOKEN" ]; then echo "Error: HF_TOKEN build argument is required." && exit 1; fi
52
 
53
+ # Download the model using huggingface_hub Python library
54
+ # Storing the token in an intermediate ENV variable is okay here as it's needed by the script.
55
+ # The layer containing the ENV declaration itself won't persist the token in the final image history in the same way as hardcoding it.
56
+ # Ensure your CI/CD or build process handles the ARG securely.
57
+ RUN echo "Downloading model mistralai/Mistral-7B-Instruct-v0.1..." && \
58
+ HF_TOKEN=${HF_TOKEN} python3 -c "
59
+ from huggingface_hub import snapshot_download
60
+ import os
61
+ token = os.environ.get('HF_TOKEN')
62
+ if not token:
63
+ raise ValueError('HF_TOKEN environment variable not set inside the script')
64
+ snapshot_download(
65
+ repo_id='mistralai/Mistral-7B-Instruct-v0.1',
66
+ local_dir='/app/model',
67
+ token=token,
68
+ # Optional: Add ignore_patterns if you know you only need specific file types
69
+ # ignore_patterns=['*.safetensors', '*.h5', '*.msgpack']
70
+ )
71
+ print('Model download complete.')
72
+ "
73
 
74
+ # Grant execute permissions if needed (though likely not for model files)
75
+ # RUN chmod -R +r /app/model
76
 
77
+ # Expose the port vLLM will run on
78
  EXPOSE 8000
79
 
80
+ # Healthcheck (Optional but recommended for Spaces)
81
+ # Checks if the API server is responding on port 8000
82
+ HEALTHCHECK --interval=15s --timeout=5s --start-period=30s --retries=3 \
83
+ CMD curl --fail http://localhost:8000/health || exit 1
84
+
85
+ # Define the entrypoint command
86
+ # Using python3 -m is standard practice
87
+ CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]