Spaces:
Paused
Paused
File size: 407 Bytes
5822c0a 4758518 5822c0a a9665f3 4758518 3fe0a2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 |
#!/bin/bash
MODEL="OpenGVLab/InternVL2_5-8B"
# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
# export NCCL_P2P_DISABLE=1
# export CUDA_VISIBLE_DEVICES="0"
# export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
PORT=8000
vllm serve $MODEL \
--port $PORT \
--trust-remote-code \
--limit-mm-per-prompt image=4 \
--max-model-len 8192 \
--gpu-memory-utilization 0.97 \
--disable-log-requests
|