Spaces:
Sleeping
Sleeping
Damien Benveniste
commited on
Commit
·
81d6e3d
1
Parent(s):
be09c5e
modified
Browse files- entrypoint.sh +11 -10
entrypoint.sh
CHANGED
@@ -10,17 +10,18 @@ MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
|
|
10 |
ENFORCE_EAGER=${ENFORCE_EAGER:-true}
|
11 |
|
12 |
|
13 |
-
#
|
14 |
-
|
15 |
-
# echo "Error: API_KEY environment variable is not set"
|
16 |
-
# exit 1
|
17 |
-
# fi
|
18 |
-
|
19 |
-
# Run vLLM server
|
20 |
-
exec vllm serve $MODEL \
|
21 |
--dtype $DTYPE \
|
22 |
--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
|
23 |
--max-num-seqs $MAX_NUM_SEQS \
|
24 |
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
|
25 |
-
--max-model-len $MAX_MODEL_LEN
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
ENFORCE_EAGER=${ENFORCE_EAGER:-true}
|
11 |
|
12 |
|
13 |
+
# Construct the command
|
14 |
+
CMD="vllm serve $MODEL \
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
--dtype $DTYPE \
|
16 |
--max-num-batched-tokens $MAX_NUM_BATCHED_TOKENS \
|
17 |
--max-num-seqs $MAX_NUM_SEQS \
|
18 |
--gpu-memory-utilization $GPU_MEMORY_UTILIZATION \
|
19 |
+
--max-model-len $MAX_MODEL_LEN"
|
20 |
+
|
21 |
+
# Add enforce-eager only if it's set to true
|
22 |
+
if [ "$ENFORCE_EAGER" = "true" ]; then
|
23 |
+
CMD="$CMD --enforce-eager"
|
24 |
+
fi
|
25 |
+
|
26 |
+
# Execute the command
|
27 |
+
exec $CMD
|