enskaff commited on
Commit
3d5ae27
·
verified ·
1 Parent(s): 8053da8

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +20 -79
Dockerfile CHANGED
@@ -1,110 +1,51 @@
1
- # Use a specific CUDA version and OS combination
2
  FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
3
 
4
- # Set environment variables to ensure non-interactive installs
5
  ENV DEBIAN_FRONTEND=noninteractive \
6
  PIP_NO_CACHE_DIR=off \
7
  PIP_DISABLE_PIP_VERSION_CHECK=on \
8
- # Set path for Rust/Cargo
9
  PATH="/root/.cargo/bin:${PATH}" \
10
- # Set default Python encoding (good practice)
11
  PYTHONIOENCODING=utf-8 \
12
  PYTHONUNBUFFERED=1
13
 
14
- # System packages:
15
- # - Combine update and install in one layer to reduce size.
16
- # - Use --no-install-recommends to avoid unnecessary packages.
17
- # - Install build tools, git, git-lfs, curl, python, pip, and ca-certificates (for HTTPS).
18
- # - Clean up apt cache afterwards.
19
  RUN apt-get update && apt-get install -y --no-install-recommends \
20
- git \
21
- git-lfs \
22
- python3 \
23
- python3-pip \
24
- curl \
25
- ca-certificates \
26
- build-essential \
27
- && apt-get clean \
28
- && rm -rf /var/lib/apt/lists/*
29
 
30
- # Install Rust using the recommended secure method
31
- # Needed for tokenizers compilation if wheels are not available
32
  RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
33
- # Note: Rust and build-essential add significantly to image size.
34
- # Consider a multi-stage build if size is critical.
35
 
36
- # Set up the application directory
37
  WORKDIR /app
38
-
39
- # Copy requirements file first to leverage Docker cache
40
- # Ensure you have a requirements.txt file in your repository!
41
  COPY requirements.txt .
 
 
42
 
43
- # Install Python dependencies from requirements file
44
- # Upgrading pip first is good practice.
45
- RUN pip3 install --no-cache-dir --upgrade pip
46
- RUN pip3 install --no-cache-dir -r requirements.txt
47
-
48
- # --- Model Download ---
49
- # Use ARG for build-time secret (HF Token). HF Spaces automatically injects matching secrets.
50
  ARG HF_TOKEN
51
 
52
- # Check if HF_TOKEN was provided during build (useful for local builds, HF Spaces provides it from Secrets)
53
- # RUN if [ -z "$HF_TOKEN" ]; then echo "Error: Build argument HF_TOKEN is required but was not provided." && exit 1; fi
54
- # Optional: Comment out the check above if it causes issues on HF platform and you trust the secret injection
55
-
56
- # Download the model using huggingface_hub Python library via a here-document
57
- # This passes the script to python3's stdin, avoiding potential parsing issues with -c "..."
58
  RUN echo "Downloading model mistralai/Mistral-7B-Instruct-v0.1..." && \
59
- # Use python3 reading from stdin (here-document)
60
  python3 <<EOF
61
- # --- Start of Python Script ---
62
- import os
63
- import sys
64
  from huggingface_hub import snapshot_download
65
-
66
- print('Python environment ready for download script.')
67
-
68
- # HF_TOKEN build arg is automatically available as an environment variable
69
- # within the context of this RUN command by the Docker builder / HF platform.
70
  token = os.environ.get('HF_TOKEN')
71
-
72
  if not token:
73
- print('Warning: HF_TOKEN environment variable not found. Ensure it is set as a Secret if needed.', file=sys.stderr)
74
- # Depending on model visibility, download might proceed or fail.
75
- # For public models like Mistral, it might work without a token.
76
-
77
- print(f'Attempting download using token: {"Token Provided" if token else "No Token Provided"}')
78
-
79
  try:
80
- snapshot_download(
81
- repo_id='mistralai/Mistral-7B-Instruct-v0.1',
82
- local_dir='/app/model',
83
- token=token, # Pass the token (can be None)
84
- # Optional: Add ignore_patterns if you know you only need specific file types
85
- # ignore_patterns=['*.safetensors', '*.h5', '*.msgpack']
86
- # Add user_agent for better tracking/debugging on HF side
87
- user_agent={'dockerfile': 'huggingface-space-vllm-heredoc'}
88
- )
89
- print('Model download complete.')
90
  except Exception as e:
91
- print(f'Error during model download: {e}', file=sys.stderr)
92
- # Exit with error code to fail the Docker build if download fails
93
  sys.exit(1)
94
-
95
- # --- End of Python Script ---
96
  EOF
97
- # Note: The 'EOF' marker above MUST be at the start of the line with no preceding spaces.
98
 
99
- # Expose the port vLLM will run on
100
  EXPOSE 8000
101
-
102
- # Healthcheck (Optional but recommended for Spaces)
103
- # Checks if the API server is responding on port 8000
104
- # Wait longer initially (start-period) for model loading
105
  HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
106
- CMD curl --fail http://localhost:8000/health || exit 1
107
 
108
- # Define the entrypoint command
109
- # Using python3 -m is standard practice
110
- CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]
 
1
+ # Use the NVIDIA CUDA runtime image for GPU support
2
  FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
3
 
4
+ # Set environment variables
5
  ENV DEBIAN_FRONTEND=noninteractive \
6
  PIP_NO_CACHE_DIR=off \
7
  PIP_DISABLE_PIP_VERSION_CHECK=on \
 
8
  PATH="/root/.cargo/bin:${PATH}" \
 
9
  PYTHONIOENCODING=utf-8 \
10
  PYTHONUNBUFFERED=1
11
 
12
+ # Install system dependencies
 
 
 
 
13
  RUN apt-get update && apt-get install -y --no-install-recommends \
14
+ git git-lfs python3 python3-pip curl ca-certificates build-essential && \
15
+ apt-get clean && rm -rf /var/lib/apt/lists/*
 
 
 
 
 
 
 
16
 
17
+ # Install Rust (required for compiling tokenizers if needed)
 
18
  RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
 
 
19
 
20
+ # Set the working directory and install Python dependencies
21
  WORKDIR /app
 
 
 
22
  COPY requirements.txt .
23
+ RUN pip3 install --no-cache-dir --upgrade pip && \
24
+ pip3 install --no-cache-dir -r requirements.txt
25
 
26
+ # Use build-time argument for Hugging Face token
 
 
 
 
 
 
27
  ARG HF_TOKEN
28
 
29
+ # Download the model during build
 
 
 
 
 
30
  RUN echo "Downloading model mistralai/Mistral-7B-Instruct-v0.1..." && \
 
31
  python3 <<EOF
32
+ import os, sys
 
 
33
  from huggingface_hub import snapshot_download
 
 
 
 
 
34
  token = os.environ.get('HF_TOKEN')
 
35
  if not token:
36
+ print('Warning: HF_TOKEN not provided.', file=sys.stderr)
 
 
 
 
 
37
  try:
38
+ snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1',
39
+ local_dir='/app/model', token=token)
 
 
 
 
 
 
 
 
40
  except Exception as e:
41
+ print(f'Download error: {e}', file=sys.stderr)
 
42
  sys.exit(1)
 
 
43
  EOF
 
44
 
45
+ # Expose the port and define a healthcheck
46
  EXPOSE 8000
 
 
 
 
47
  HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
48
+ CMD curl --fail http://localhost:8000/health || exit 1
49
 
50
+ # Start the vLLM API server
51
+ CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]