llm_host / Dockerfile
Bahodir Nematjonov
updated model
7c59172
raw
history blame
457 Bytes
# Use a lightweight PyTorch image with CUDA support
FROM pytorch/pytorch:2.0.0-cuda11.7-cudnn8-runtime
# Set working directory
WORKDIR /code
# Install dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Reduce memory usage (optional)
ENV TRANSFORMERS_NO_ADVISORY_WARNINGS=1
ENV HF_HOME="/code/hf_cache"
# Expose API port
EXPOSE 7860
# Run FastAPI
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]