File size: 407 Bytes
5822c0a
 
 
4758518
 
 
 
5822c0a
 
 
 
a9665f3
4758518
3fe0a2e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#!/bin/bash

MODEL="OpenGVLab/InternVL2_5-8B"
# export CUDA_DEVICE_ORDER="PCI_BUS_ID"
# export NCCL_P2P_DISABLE=1
# export CUDA_VISIBLE_DEVICES="0"
# export PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True
PORT=8000
vllm serve $MODEL \
    --port $PORT \
    --trust-remote-code \
    --limit-mm-per-prompt image=4 \
    --max-model-len 8192 \
    --gpu-memory-utilization 0.97 \
    --disable-log-requests