bobber commited on
Commit
5877ea3
·
verified ·
1 Parent(s): 32952ac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -18,8 +18,8 @@ import torch
18
  # filename = "simplescaling_s1-32B-Q4_K_S.gguf"
19
  # model_name = "simplescaling/s1.1-32B"
20
  # model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF"
21
- # model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit"
22
- model_name = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
23
  filename = "Llama-4-Scout-17B-16E-Instruct-UD-IQ2_XXS.gguf"
24
  torch_dtype = torch.bfloat16 # could be torch.float16 or torch.bfloat16 torch.float32 too
25
  cache_dir = "/data"
@@ -35,7 +35,7 @@ cache_dir = "/data"
35
  model = Llama4ForConditionalGeneration.from_pretrained(
36
  model_name,
37
  # default is eager attention
38
- attn_implementation="flex_attention",
39
  # gguf_file=filename,
40
  cache_dir = cache_dir,
41
  torch_dtype=torch_dtype,
 
18
  # filename = "simplescaling_s1-32B-Q4_K_S.gguf"
19
  # model_name = "simplescaling/s1.1-32B"
20
  # model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-GGUF"
21
+ model_name = "unsloth/Llama-4-Scout-17B-16E-Instruct-unsloth-bnb-4bit"
22
+ # model_name = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
23
  filename = "Llama-4-Scout-17B-16E-Instruct-UD-IQ2_XXS.gguf"
24
  torch_dtype = torch.bfloat16 # could be torch.float16 or torch.bfloat16 torch.float32 too
25
  cache_dir = "/data"
 
35
  model = Llama4ForConditionalGeneration.from_pretrained(
36
  model_name,
37
  # default is eager attention
38
+ # attn_implementation="flex_attention",
39
  # gguf_file=filename,
40
  cache_dir = cache_dir,
41
  torch_dtype=torch_dtype,