Spaces:
Paused
Paused
feat(runner.sh): --enable-chunked-prefill and --enable-prefix-caching for faster generate
Browse files
runner.sh
CHANGED
@@ -52,4 +52,6 @@ python -u /app/openai_compatible_api_server.py \
|
|
52 |
--max-model-len 32768 \
|
53 |
--dtype float16 \
|
54 |
--enforce-eager \
|
55 |
-
--gpu-memory-utilization 0.9
|
|
|
|
|
|
52 |
--max-model-len 32768 \
|
53 |
--dtype float16 \
|
54 |
--enforce-eager \
|
55 |
+
--gpu-memory-utilization 0.9 \
|
56 |
+
--enable-chunked-prefill \
|
57 |
+
--enable-prefix-caching
|