Spaces:
Paused
Paused
MODEL="OpenGVLab/InternVL2_5-8B" | |
# export CUDA_DEVICE_ORDER="PCI_BUS_ID" | |
# export NCCL_P2P_DISABLE=1 | |
# export CUDA_VISIBLE_DEVICES="0" | |
# export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True | |
PORT=8000 | |
vllm serve $MODEL \ | |
--port $PORT \ | |
--trust-remote-code \ | |
--limit-mm-per-prompt image=4 \ | |
--max-model-len 8192 \ | |
--gpu-memory-utilization 0.97 \ | |
--disable-log-requests | |