bobber commited on
Commit
79b9a75
·
verified ·
1 Parent(s): 9268605

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -34,7 +34,8 @@ cache_dir = "/data"
34
  # )
35
  model = Llama4ForConditionalGeneration.from_pretrained(
36
  model_name,
37
- attn_implementation="flex_attention",
 
38
  # gguf_file=filename,
39
  cache_dir = cache_dir,
40
  torch_dtype=torch_dtype,
@@ -83,11 +84,11 @@ def generate(prompt, history):
83
  inputs = processor.apply_chat_template(
84
  messages,
85
  add_generation_prompt=True,
86
- tokenize=True,
87
  return_dict=True,
88
  return_tensors="pt",
89
  ).to(gpu_model.device)
90
- outputs = gpu_model.generate(
91
  **inputs,
92
  max_new_tokens=512,
93
  )
 
34
  # )
35
  model = Llama4ForConditionalGeneration.from_pretrained(
36
  model_name,
37
+ # flex_attention is only needed for image
38
+ # attn_implementation="flex_attention",
39
  # gguf_file=filename,
40
  cache_dir = cache_dir,
41
  torch_dtype=torch_dtype,
 
84
  inputs = processor.apply_chat_template(
85
  messages,
86
  add_generation_prompt=True,
87
+ # tokenize=True,
88
  return_dict=True,
89
  return_tensors="pt",
90
  ).to(gpu_model.device)
91
+ outputs = model.generate(
92
  **inputs,
93
  max_new_tokens=512,
94
  )