bobber commited on
Commit
ce4b3d4
·
verified ·
1 Parent(s): 45b64c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -3
app.py CHANGED
@@ -1,6 +1,8 @@
1
  import gradio as gr
2
  import spaces
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
 
4
  import torch
5
 
6
  #Qwen/Qwen2.5-14B-Instruct-1M
@@ -14,14 +16,23 @@ filename = "Llama-4-Scout-17B-16E-Instruct-UD-IQ2_XXS.gguf"
14
  torch_dtype = torch.bfloat16 # could be torch.float16 or torch.bfloat16 torch.float32 too
15
  cache_dir = "/data"
16
 
17
- model = AutoModelForCausalLM.from_pretrained(
 
 
 
 
 
 
 
 
18
  model_name,
19
- # subfolder=subfolder,
20
  gguf_file=filename,
21
  torch_dtype=torch_dtype,
22
  device_map="auto",
23
  cache_dir = cache_dir,
24
  )
 
25
  tokenizer = AutoTokenizer.from_pretrained(model_name
26
  , gguf_file=filename
27
  # , subfolder=subfolder
@@ -44,7 +55,8 @@ def generate(prompt, history):
44
  ]
45
  text = tokenizer.apply_chat_template(
46
  messages,
47
- tokenize=False,
 
48
  add_generation_prompt=True
49
  )
50
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
 
1
  import gradio as gr
2
  import spaces
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ from transformers import AutoProcessor, Llama4ForConditionalGeneration
5
+
6
  import torch
7
 
8
  #Qwen/Qwen2.5-14B-Instruct-1M
 
16
  torch_dtype = torch.bfloat16 # could be torch.float16 or torch.bfloat16 torch.float32 too
17
  cache_dir = "/data"
18
 
19
+ # model = AutoModelForCausalLM.from_pretrained(
20
+ # model_name,
21
+ # # subfolder=subfolder,
22
+ # gguf_file=filename,
23
+ # torch_dtype=torch_dtype,
24
+ # device_map="auto",
25
+ # cache_dir = cache_dir,
26
+ # )
27
+ model = Llama4ForConditionalGeneration.from_pretrained(
28
  model_name,
29
+ attn_implementation="flex_attention",
30
  gguf_file=filename,
31
  torch_dtype=torch_dtype,
32
  device_map="auto",
33
  cache_dir = cache_dir,
34
  )
35
+
36
  tokenizer = AutoTokenizer.from_pretrained(model_name
37
  , gguf_file=filename
38
  # , subfolder=subfolder
 
55
  ]
56
  text = tokenizer.apply_chat_template(
57
  messages,
58
+ # tokenize=False,
59
+ tokenize=True,
60
  add_generation_prompt=True
61
  )
62
  model_inputs = tokenizer([text], return_tensors="pt").to(model.device)