Spaces:

mknolan
/

internvl25-image-analyzer-debug

Runtime error

App Files Files Community

internvl25-image-analyzer-debug / Dockerfile

mknolan

Upload Dockerfile with huggingface_hub

82ce431 verified about 1 month ago

raw

history blame contribute delete

3.43 kB

	FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04

	# Set environment variables
	ENV DEBIAN_FRONTEND=noninteractive
	ENV PYTHONUNBUFFERED=1
	ENV HF_HOME=/app/.cache/huggingface
	ENV TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers
	ENV MPLCONFIGDIR=/tmp/matplotlib
	# Force PyTorch to use the NCCl backend
	ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128

	# Create necessary directories with proper permissions
	RUN mkdir -p /app/.cache/huggingface/transformers && \
	mkdir -p /tmp/matplotlib && \
	mkdir -p /app/gradio_cached_examples && \
	chmod -R 777 /app && \
	chmod -R 777 /tmp/matplotlib

	# Install system dependencies
	RUN apt-get update && apt-get install -y --no-install-recommends \
	build-essential \
	git \
	curl \
	ca-certificates \
	python3-pip \
	python3-dev \
	python3-setuptools \
	&& rm -rf /var/lib/apt/lists/*

	# Create a working directory
	WORKDIR /app

	# Add a script to check GPU status at startup
	RUN echo '#!/bin/bash \n\
	echo "Checking NVIDIA GPU status..." \n\
	if ! command -v nvidia-smi &> /dev/null; then \n\
	echo "WARNING: nvidia-smi command not found. NVIDIA driver might not be installed." \n\
	else \n\
	echo "NVIDIA driver found. Running nvidia-smi:" \n\
	nvidia-smi \n\
	fi \n\
	echo "Environment variables for GPU:" \n\
	echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES}" \n\
	echo "NVIDIA_VISIBLE_DEVICES=${NVIDIA_VISIBLE_DEVICES}" \n\
	exec "$@"' > /entrypoint.sh && \
	chmod +x /entrypoint.sh

	# Copy requirements file
	COPY requirements.txt .

	# Upgrade pip and install dependencies in specific order to avoid conflicts
	RUN pip3 install --no-cache-dir --upgrade pip && \
	# Install torch and torchvision first with CUDA support
	pip3 install --no-cache-dir torch==2.0.1+cu118 torchvision==0.15.2+cu118 --extra-index-url https://download.pytorch.org/whl/cu118 && \
	# Install core dependencies
	pip3 install --no-cache-dir numpy==1.24.3 scipy==1.11.3 requests==2.31.0 && \
	# Install typing-extensions first to ensure proper version for other packages
	pip3 install --no-cache-dir typing-extensions==4.10.0 && \
	# Install huggingface dependencies
	pip3 install --no-cache-dir transformers==4.37.2 safetensors==0.4.1 huggingface_hub==0.19.4 && \
	# Install timm for vision models
	pip3 install --no-cache-dir timm==0.9.11 && \
	# Install nest-asyncio for handling nested event loops
	pip3 install --no-cache-dir nest-asyncio==1.5.8 && \
	# Install lmdeploy and its dependencies first
	pip3 install --no-cache-dir "accelerate==0.30.0" && \
	pip3 install --no-cache-dir "lmdeploy==0.5.3" && \
	# Install other acceleration libraries
	pip3 install --no-cache-dir bitsandbytes==0.41.3 && \
	# Install gradio
	pip3 install --no-cache-dir gradio==3.38.0 && \
	# Install any remaining requirements
	pip3 install --no-cache-dir packaging==23.2 pyyaml==6.0.1 tqdm==4.66.1 openai==1.6.1

	# Copy the application files
	COPY . .

	# Make sure the runtime directories exist and have proper permissions
	RUN mkdir -p gradio_cached_examples && \
	chmod -R 777 gradio_cached_examples && \
	mkdir -p .cache/huggingface/transformers && \
	chmod -R 777 .cache

	# Make port 7860 available for the app
	EXPOSE 7860

	# Use our entrypoint script to check GPU status before starting the app
	ENTRYPOINT ["/entrypoint.sh"]

	# Start the application - FIXED to point to the correct file
	CMD ["python3", "app.py"]