bobber commited on
Commit
588eaad
·
verified ·
1 Parent(s): ee99a7d

Update app.py

Browse files

add quantization_config

Files changed (1) hide show
  1. app.py +7 -0
app.py CHANGED
@@ -5,6 +5,12 @@ from transformers import AutoProcessor, Llama4ForConditionalGeneration
5
 
6
  import torch
7
 
 
 
 
 
 
 
8
  #Qwen/Qwen2.5-14B-Instruct-1M
9
  #Qwen/Qwen2-0.5B
10
  # model_name = "bartowski/simplescaling_s1-32B-GGUF"
@@ -31,6 +37,7 @@ model = Llama4ForConditionalGeneration.from_pretrained(
31
  # gguf_file=filename,
32
  # cache_dir = cache_dir,
33
  torch_dtype=torch_dtype,
 
34
  device_map="auto",
35
  )
36
 
 
5
 
6
  import torch
7
 
8
+ from transformers import BitsAndBytesConfig
9
+ bnb_config = BitsAndBytesConfig(
10
+ load_in_4bit=True,
11
+ llm_int8_enable_fp32_cpu_offload=True,
12
+ )
13
+
14
  #Qwen/Qwen2.5-14B-Instruct-1M
15
  #Qwen/Qwen2-0.5B
16
  # model_name = "bartowski/simplescaling_s1-32B-GGUF"
 
37
  # gguf_file=filename,
38
  # cache_dir = cache_dir,
39
  torch_dtype=torch_dtype,
40
+ quantization_config=bnb_config,
41
  device_map="auto",
42
  )
43