Spaces:
Runtime error
Runtime error
File size: 2,559 Bytes
8ced1ba 9cb790e aa75ccb 1b85b71 6f2c834 1b85b71 6f2c834 5a60ff4 84c5bef 799283a 9cb790e 6f2c834 336ec47 3d5ae27 c392d1c 6f2c834 9cb790e 336ec47 b56af87 336ec47 6f2c834 336ec47 9cb790e 3d5ae27 25bcaf8 6f2c834 9cb790e 2d5cc5e 8ced1ba 6f2c834 8ced1ba 2d5cc5e 9cb790e 8ced1ba 6f2c834 8ced1ba 6f2c834 8ced1ba 6f2c834 9cb790e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# syntax=docker/dockerfile:1
# Use a more recent base image if possible (check vLLM compatibility)
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04
# Set essential environment variables and disable numba caching
ENV DEBIAN_FRONTEND=noninteractive \
PIP_NO_CACHE_DIR=off \
PIP_DISABLE_PIP_VERSION_CHECK=on \
PYTHONIOENCODING=utf-8 \
PYTHONUNBUFFERED=1 \
HF_HOME=/app/huggingface_cache \
TRANSFORMERS_CACHE=/app/huggingface_cache \
NUMBA_DISABLE_CACHE=1 \
NUMBA_CACHE_DIR=/tmp/numba_cache
# Create the necessary cache directories and ensure they are writable
# Use the ENV variables for consistency
RUN mkdir -p ${HF_HOME} && chmod -R 777 ${HF_HOME} && \
mkdir -p ${NUMBA_CACHE_DIR} && chmod -R 777 ${NUMBA_CACHE_DIR}
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
git git-lfs python3 python3-pip curl ca-certificates build-essential && \
apt-get clean && rm -rf /var/lib/apt/lists/*
# Optionally, install Rust (needed for some tokenizers or building packages)
# Consider adding PATH update for Rust binaries if needed later
ENV PATH="/root/.cargo/bin:${PATH}"
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
WORKDIR /app
# Copy requirements file and install Python dependencies
COPY requirements.txt .
# Consider upgrading vllm here if the error persists: pip3 install --no-cache-dir --upgrade vllm
RUN pip3 install --no-cache-dir --upgrade pip && \
pip3 install --no-cache-dir -r requirements.txt
# Use BuildKit secret mount to securely inject HF_TOKEN during build.
# Ensure the downloaded model exists and is accessible
RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
python3 -c "import os; \
from huggingface_hub import snapshot_download; \
token = os.environ.get('HF_TOKEN'); \
assert token, 'HF_TOKEN is not set!'; \
print('Token is set. Downloading model...'); \
snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token, local_dir_use_symlinks=False)" \
&& ls -l /app/model # Add a check to see if download worked
# Expose the port for the API server
EXPOSE 8000
# Healthcheck to verify API server is responding
HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
CMD curl --fail http://localhost:8000/health || exit 1
# Launch the vLLM OpenAI-style API server, pointing to the downloaded model directory.
CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"] |