Spaces:
Sleeping
Sleeping
Damien Benveniste
commited on
Commit
·
b2efd89
1
Parent(s):
7df3af0
modified
Browse files- Dockerfile +3 -2
- entrypoint.sh +35 -35
Dockerfile
CHANGED
@@ -66,5 +66,6 @@
|
|
66 |
FROM vllm/vllm-openai:latest
|
67 |
ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
|
68 |
EXPOSE 8000
|
69 |
-
|
70 |
-
|
|
|
|
66 |
FROM vllm/vllm-openai:latest
|
67 |
ENV PATH="/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:$PATH"
|
68 |
EXPOSE 8000
|
69 |
+
WORKDIR /app
|
70 |
+
COPY entrypoint.sh /app/entrypoint.sh
|
71 |
+
ENTRYPOINT [ "/app/entrypoint.sh" ]
|
entrypoint.sh
CHANGED
@@ -2,40 +2,40 @@
|
|
2 |
|
3 |
# Default values
|
4 |
# MODEL=${MODEL:-"microsoft/Phi-3-mini-4k-instruct"}
|
5 |
-
MODEL=${MODEL:-"EleutherAI/pythia-70m"}
|
6 |
-
DTYPE=${DTYPE:-"half"}
|
7 |
-
MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
|
8 |
-
MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
|
9 |
-
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
|
10 |
-
MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
|
11 |
-
ENFORCE_EAGER=${ENFORCE_EAGER:-true}
|
12 |
|
13 |
-
# Disable usage stats via environment variable
|
14 |
-
export VLLM_DISABLE_USAGE_STATS=true
|
15 |
|
16 |
-
# Print environment for debugging
|
17 |
-
echo "Environment variables:"
|
18 |
-
env
|
19 |
|
20 |
-
# Create and set permissions for the config directory
|
21 |
-
CONFIG_DIR=${XDG_CONFIG_HOME:-"/tmp/config"}
|
22 |
|
23 |
-
if [ ! -d "$CONFIG_DIR" ]; then
|
24 |
-
|
25 |
-
fi
|
26 |
-
chmod -R 777 "$CONFIG_DIR"
|
27 |
-
echo "Permissions for $CONFIG_DIR:"
|
28 |
-
ls -la "$CONFIG_DIR"
|
29 |
|
30 |
-
# Check and set permissions for directories
|
31 |
-
for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config; do
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
done
|
39 |
|
40 |
# # Construct the command
|
41 |
# CMD="vllm serve $MODEL \
|
@@ -53,12 +53,12 @@ done
|
|
53 |
# fi
|
54 |
|
55 |
|
56 |
-
|
57 |
--model EleutherAI/pythia-70m \
|
58 |
-
--gpu-memory-utilization 0.9
|
59 |
-
--max-model-len 200
|
60 |
|
61 |
|
62 |
-
# Execute the command
|
63 |
-
echo "Running command: $CMD"
|
64 |
-
exec $CMD
|
|
|
2 |
|
3 |
# Default values
|
4 |
# MODEL=${MODEL:-"microsoft/Phi-3-mini-4k-instruct"}
|
5 |
+
# MODEL=${MODEL:-"EleutherAI/pythia-70m"}
|
6 |
+
# DTYPE=${DTYPE:-"half"}
|
7 |
+
# MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-512}
|
8 |
+
# MAX_NUM_SEQS=${MAX_NUM_SEQS:-16}
|
9 |
+
# GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.85}
|
10 |
+
# MAX_MODEL_LEN=${MAX_MODEL_LEN:-512}
|
11 |
+
# ENFORCE_EAGER=${ENFORCE_EAGER:-true}
|
12 |
|
13 |
+
# # Disable usage stats via environment variable
|
14 |
+
# export VLLM_DISABLE_USAGE_STATS=true
|
15 |
|
16 |
+
# # Print environment for debugging
|
17 |
+
# echo "Environment variables:"
|
18 |
+
# env
|
19 |
|
20 |
+
# # Create and set permissions for the config directory
|
21 |
+
# CONFIG_DIR=${XDG_CONFIG_HOME:-"/tmp/config"}
|
22 |
|
23 |
+
# if [ ! -d "$CONFIG_DIR" ]; then
|
24 |
+
# mkdir -p "$CONFIG_DIR"
|
25 |
+
# fi
|
26 |
+
# chmod -R 777 "$CONFIG_DIR"
|
27 |
+
# echo "Permissions for $CONFIG_DIR:"
|
28 |
+
# ls -la "$CONFIG_DIR"
|
29 |
|
30 |
+
# # Check and set permissions for directories
|
31 |
+
# for dir in /tmp/huggingface /tmp/cache /tmp/numba_cache /tmp/outlines_cache /.config; do
|
32 |
+
# if [ ! -d "$dir" ]; then
|
33 |
+
# mkdir -p "$dir"
|
34 |
+
# fi
|
35 |
+
# chmod -R 777 "$dir"
|
36 |
+
# echo "Permissions for $dir:"
|
37 |
+
# ls -la "$dir"
|
38 |
+
# done
|
39 |
|
40 |
# # Construct the command
|
41 |
# CMD="vllm serve $MODEL \
|
|
|
53 |
# fi
|
54 |
|
55 |
|
56 |
+
python3 -m vllm.entrypoints.openai.api_server \
|
57 |
--model EleutherAI/pythia-70m \
|
58 |
+
--gpu-memory-utilization 0.9 \
|
59 |
+
--max-model-len 200
|
60 |
|
61 |
|
62 |
+
# # Execute the command
|
63 |
+
# echo "Running command: $CMD"
|
64 |
+
# exec $CMD
|