Spaces:
Paused
Paused
Oleh Kuznetsov
commited on
Commit
·
07f77e4
1
Parent(s):
f7972c6
fixup! feat(rec): Fix prompt storage
Browse files
app.py
CHANGED
@@ -14,6 +14,7 @@ VLLM_MODEL_NAME = os.getenv("VLLM_MODEL_NAME")
|
|
14 |
VLLM_GPU_MEMORY_UTILIZATION = float(os.getenv("VLLM_GPU_MEMORY_UTILIZATION"))
|
15 |
VLLM_MAX_SEQ_LEN = int(os.getenv("VLLM_MAX_SEQ_LEN"))
|
16 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
17 |
|
18 |
# -------------------------------- HELPERS -------------------------------------
|
19 |
def load_prompt(path: Path) -> str:
|
@@ -40,6 +41,7 @@ local_llm = LLM(
|
|
40 |
gpu_memory_utilization=VLLM_GPU_MEMORY_UTILIZATION,
|
41 |
hf_token=HF_TOKEN,
|
42 |
enforce_eager=True,
|
|
|
43 |
)
|
44 |
|
45 |
json_schema = StructuredQueryRewriteResponse.model_json_schema()
|
|
|
14 |
VLLM_GPU_MEMORY_UTILIZATION = float(os.getenv("VLLM_GPU_MEMORY_UTILIZATION"))
|
15 |
VLLM_MAX_SEQ_LEN = int(os.getenv("VLLM_MAX_SEQ_LEN"))
|
16 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
17 |
+
VLLM_DTYPE = os.getenv("VLLM_DTYPE")
|
18 |
|
19 |
# -------------------------------- HELPERS -------------------------------------
|
20 |
def load_prompt(path: Path) -> str:
|
|
|
41 |
gpu_memory_utilization=VLLM_GPU_MEMORY_UTILIZATION,
|
42 |
hf_token=HF_TOKEN,
|
43 |
enforce_eager=True,
|
44 |
+
dtype=VLLM_DTYPE,
|
45 |
)
|
46 |
|
47 |
json_schema = StructuredQueryRewriteResponse.model_json_schema()
|