bobpopboom commited on
Commit
f43b68f
·
verified ·
1 Parent(s): 74acc14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -8
app.py CHANGED
@@ -1,20 +1,28 @@
1
  import gradio as gr
2
- from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
  import torch
4
 
5
  model_id = "thrishala/mental_health_chatbot"
6
 
7
  try:
8
- quantization_config = BitsAndBytesConfig(
9
- load_in_4bit=True, # we going to 4 babey
10
- )
11
- tokenizer = AutoTokenizer.from_pretrained(model_id)
12
  model = AutoModelForCausalLM.from_pretrained(
13
  model_id,
14
- quantization_config=quantization_config,
15
- device_map="auto", #Use GPU if available
 
 
 
 
 
 
 
 
 
 
 
 
16
  )
17
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
18
 
19
  except Exception as e:
20
  print(f"Error loading model: {e}")
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
  model_id = "thrishala/mental_health_chatbot"
6
 
7
  try:
8
+ # Load model with int8 quantization for CPU
 
 
 
9
  model = AutoModelForCausalLM.from_pretrained(
10
  model_id,
11
+ device_map="cpu",
12
+ torch_dtype=torch.float16, # Use float16 for reduced memory
13
+ low_cpu_mem_usage=True, # Enable memory optimization
14
+ )
15
+
16
+ # Load tokenizer
17
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
18
+
19
+ # Create pipeline with optimizations
20
+ pipe = pipeline(
21
+ "text-generation",
22
+ model=model,
23
+ tokenizer=tokenizer,
24
+ torch_dtype=torch.float16,
25
  )
 
26
 
27
  except Exception as e:
28
  print(f"Error loading model: {e}")