Spaces:

bobpopboom
/

testing

Sleeping

bobpopboom commited on Feb 9

Commit

f43b68f

verified ·

1 Parent(s): 74acc14

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,20 +1,28 @@
 import gradio as gr
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
 import torch
 model_id = "thrishala/mental_health_chatbot"
 try:
-    quantization_config = BitsAndBytesConfig(
-        load_in_4bit=True,  # we going to 4 babey
-    )
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
-        quantization_config=quantization_config,
-        device_map="auto", #Use GPU if available
     )
-    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
 except Exception as e:
     print(f"Error loading model: {e}")

 import gradio as gr
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
 import torch
 model_id = "thrishala/mental_health_chatbot"
 try:
+    # Load model with int8 quantization for CPU
     model = AutoModelForCausalLM.from_pretrained(
         model_id,
+        device_map="cpu",
+        torch_dtype=torch.float16,  # Use float16 for reduced memory
+        low_cpu_mem_usage=True,     # Enable memory optimization
+    )
+    # Load tokenizer
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    # Create pipeline with optimizations
+    pipe = pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        torch_dtype=torch.float16,
     )
 except Exception as e:
     print(f"Error loading model: {e}")