Tonic commited on
Commit
0ac061c
unverified
1 Parent(s): 9aaad03

add bnb config

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -5,7 +5,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
5
  import torch
6
  import os
7
 
8
- HF_TOKEN = os.environ.get("HF_TOKEN") # Make sure to set this in your Space secrets
9
 
10
  title = """
11
  # Welcome to 馃専Tonic's馃Command-A
@@ -19,8 +19,8 @@ model_id = "Tonic/c4ai-command-a-03-2025-4bit_fp4"
19
  quantization_config = BitsAndBytesConfig(
20
  load_in_4bit=True, # Enable 4-bit quantization
21
  bnb_4bit_quant_type="fp4", # Use FP4 quantization
22
- bnb_4bit_use_double_quant=True, # Optional: double quantization for better precision
23
- llm_int8_enable_fp32_cpu_offload=True # Allow CPU offloading for 32-bit modules
24
  )
25
 
26
  # Load tokenizer and model
@@ -28,7 +28,7 @@ tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
28
  model = AutoModelForCausalLM.from_pretrained(
29
  model_id,
30
  quantization_config=quantization_config, # Apply quantization
31
- device_map="auto", # Automatically map to available devices
32
  torch_dtype=torch.bfloat16,
33
  token=HF_TOKEN
34
  )
 
5
  import torch
6
  import os
7
 
8
+ HF_TOKEN = os.environ.get("HF_TOKEN")
9
 
10
  title = """
11
  # Welcome to 馃専Tonic's馃Command-A
 
19
  quantization_config = BitsAndBytesConfig(
20
  load_in_4bit=True, # Enable 4-bit quantization
21
  bnb_4bit_quant_type="fp4", # Use FP4 quantization
22
+ bnb_4bit_use_double_quant=True#, # Optional: double quantization for better precision
23
+ # llm_int8_enable_fp32_cpu_offload=True # Allow CPU offloading for 32-bit modules
24
  )
25
 
26
  # Load tokenizer and model
 
28
  model = AutoModelForCausalLM.from_pretrained(
29
  model_id,
30
  quantization_config=quantization_config, # Apply quantization
31
+ # device_map="auto", # Automatically map to available devices
32
  torch_dtype=torch.bfloat16,
33
  token=HF_TOKEN
34
  )