yusufs commited on
Commit
8c5a84b
·
1 Parent(s): 5bd7bc7

feat(runner.sh): --enable-chunked-prefill and --enable-prefix-caching for faster generate

Browse files
Files changed (1) hide show
  1. runner.sh +3 -1
runner.sh CHANGED
@@ -52,4 +52,6 @@ python -u /app/openai_compatible_api_server.py \
52
  --max-model-len 32768 \
53
  --dtype float16 \
54
  --enforce-eager \
55
- --gpu-memory-utilization 0.9
 
 
 
52
  --max-model-len 32768 \
53
  --dtype float16 \
54
  --enforce-eager \
55
+ --gpu-memory-utilization 0.9 \
56
+ --enable-chunked-prefill \
57
+ --enable-prefix-caching