Spaces:

enskaff
/

mistral-vllm-chat

Runtime error

App Files Files Community

mistral-vllm-chat / Dockerfile

enskaff

Update Dockerfile

aa75ccb verified about 2 months ago

raw

history blame contribute delete

2.56 kB

	# syntax=docker/dockerfile:1
	# Use a more recent base image if possible (check vLLM compatibility)
	FROM nvidia/cuda:12.1.0-runtime-ubuntu20.04

	# Set essential environment variables and disable numba caching
	ENV DEBIAN_FRONTEND=noninteractive \
	PIP_NO_CACHE_DIR=off \
	PIP_DISABLE_PIP_VERSION_CHECK=on \
	PYTHONIOENCODING=utf-8 \
	PYTHONUNBUFFERED=1 \
	HF_HOME=/app/huggingface_cache \
	TRANSFORMERS_CACHE=/app/huggingface_cache \
	NUMBA_DISABLE_CACHE=1 \
	NUMBA_CACHE_DIR=/tmp/numba_cache

	# Create the necessary cache directories and ensure they are writable
	# Use the ENV variables for consistency
	RUN mkdir -p ${HF_HOME} && chmod -R 777 ${HF_HOME} && \
	mkdir -p ${NUMBA_CACHE_DIR} && chmod -R 777 ${NUMBA_CACHE_DIR}

	# Install system dependencies
	RUN apt-get update && apt-get install -y --no-install-recommends \
	git git-lfs python3 python3-pip curl ca-certificates build-essential && \
	apt-get clean && rm -rf /var/lib/apt/lists/*

	# Optionally, install Rust (needed for some tokenizers or building packages)
	# Consider adding PATH update for Rust binaries if needed later
	ENV PATH="/root/.cargo/bin:${PATH}"
	RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs \| sh -s -- -y --default-toolchain stable

	WORKDIR /app

	# Copy requirements file and install Python dependencies
	COPY requirements.txt .
	# Consider upgrading vllm here if the error persists: pip3 install --no-cache-dir --upgrade vllm
	RUN pip3 install --no-cache-dir --upgrade pip && \
	pip3 install --no-cache-dir -r requirements.txt

	# Use BuildKit secret mount to securely inject HF_TOKEN during build.
	# Ensure the downloaded model exists and is accessible
	RUN --mount=type=secret,id=HF_TOKEN,env=HF_TOKEN,mode=0444,required=true \
	python3 -c "import os; \
	from huggingface_hub import snapshot_download; \
	token = os.environ.get('HF_TOKEN'); \
	assert token, 'HF_TOKEN is not set!'; \
	print('Token is set. Downloading model...'); \
	snapshot_download(repo_id='mistralai/Mistral-7B-Instruct-v0.1', local_dir='/app/model', token=token, local_dir_use_symlinks=False)" \
	&& ls -l /app/model # Add a check to see if download worked

	# Expose the port for the API server
	EXPOSE 8000

	# Healthcheck to verify API server is responding
	HEALTHCHECK --interval=20s --timeout=10s --start-period=120s --retries=3 \
	CMD curl --fail http://localhost:8000/health \|\| exit 1

	# Launch the vLLM OpenAI-style API server, pointing to the downloaded model directory.
	CMD ["python3", "-m", "vllm.entrypoints.openai.api_server", "--model", "/app/model"]