Oleh Kuznetsov commited on
Commit
07f77e4
·
1 Parent(s): f7972c6

fixup! feat(rec): Fix prompt storage

Browse files
Files changed (1) hide show
  1. app.py +2 -0
app.py CHANGED
@@ -14,6 +14,7 @@ VLLM_MODEL_NAME = os.getenv("VLLM_MODEL_NAME")
14
  VLLM_GPU_MEMORY_UTILIZATION = float(os.getenv("VLLM_GPU_MEMORY_UTILIZATION"))
15
  VLLM_MAX_SEQ_LEN = int(os.getenv("VLLM_MAX_SEQ_LEN"))
16
  HF_TOKEN = os.getenv("HF_TOKEN")
 
17
 
18
  # -------------------------------- HELPERS -------------------------------------
19
  def load_prompt(path: Path) -> str:
@@ -40,6 +41,7 @@ local_llm = LLM(
40
  gpu_memory_utilization=VLLM_GPU_MEMORY_UTILIZATION,
41
  hf_token=HF_TOKEN,
42
  enforce_eager=True,
 
43
  )
44
 
45
  json_schema = StructuredQueryRewriteResponse.model_json_schema()
 
14
  VLLM_GPU_MEMORY_UTILIZATION = float(os.getenv("VLLM_GPU_MEMORY_UTILIZATION"))
15
  VLLM_MAX_SEQ_LEN = int(os.getenv("VLLM_MAX_SEQ_LEN"))
16
  HF_TOKEN = os.getenv("HF_TOKEN")
17
+ VLLM_DTYPE = os.getenv("VLLM_DTYPE")
18
 
19
  # -------------------------------- HELPERS -------------------------------------
20
  def load_prompt(path: Path) -> str:
 
41
  gpu_memory_utilization=VLLM_GPU_MEMORY_UTILIZATION,
42
  hf_token=HF_TOKEN,
43
  enforce_eager=True,
44
+ dtype=VLLM_DTYPE,
45
  )
46
 
47
  json_schema = StructuredQueryRewriteResponse.model_json_schema()