Spaces:
Runtime error
Runtime error
# syntax=docker/dockerfile:1 | |
# Use a more recent base image if possible (check vLLM compatibility) | |
FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04 | |
# Set essential environment variables and disable numba caching | |
ENV DEBIAN_FRONTEND=noninteractive \ | |
PIP_NO_CACHE_DIR=off \ | |
PIP_DISABLE_PIP_VERSION_CHECK=on \ | |
PYTHONIOENCODING=utf-8 \ | |
PYTHONUNBUFFERED=1 \ | |
HF_HOME=/app/huggingface_cache \ | |
TRANSFORMERS_CACHE=/app/huggingface_cache \ | |
NUMBA_DISABLE_CACHE=1 \ | |
NUMBA_CACHE_DIR=/tmp/numba_cache | |
# Create the necessary cache directories and ensure they are writable | |
# Use the ENV variables for consistency | |
RUN mkdir -p ${HF_HOME} && chmod -R 777 ${HF_HOME} && \ | |
mkdir -p ${NUMBA_CACHE_DIR} && chmod -R 777 ${NUMBA_CACHE_DIR} | |
# Install system dependencies | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
git git-lfs python3 python3-pip curl ca-certificates build-essential && \ | |
apt-get clean && rm -rf /var/lib/apt/lists/* | |
# Optionally, install Rust (needed for some tokenizers or building packages) | |
# Consider adding PATH update for Rust binaries if needed later | |
ENV PATH="/root/.cargo/bin:${PATH}" | |
RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable | |
WORKDIR /app | |
# Copy requirements file and install Python dependencies | |
COPY requirements.txt . | |
# Consider upgrading vllm here if the error persists: pip3 install --no-cache-dir --upgrade vllm | |
RUN pip3 install --no-cache-dir --upgrade pip && \ | |
pip3 install --no-cache-dir -r requirements.txt | |
# Use BuildKit secret mount to securely inject HF_TOKEN during build. | |
# Ensure the downloaded model exists and is accessible | |
RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \ | |
python3 -c "import os; \ | |
from huggingface_hub import snapshot_download; \ | |
token = os.environ.get('HF_TOKEN'); \ | |
assert token, 'HF_TOKEN is not set!'; \ | |
print('Token is set. Downloading model...'); \ | |
snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token, local_dir_use_symlinks=False)" \ | |
&& ls -l /app/model # Add a check to see if download worked | |
# Expose the port for the API server | |
EXPOSE 8000 | |
# Healthcheck to verify API server is responding | |
HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \ | |
CMD curl --fail http://localhost:8000/health || exit 1 | |
# Launch the vLLM OpenAI-style API server, pointing to the downloaded model directory. | |
CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"] |