Mental_Health / app.py
ericwithpy's picture
Update app.py
e62ae46 verified
raw
history blame
1.96 kB
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
device = "cuda" if torch.cuda.is_available() else "cpu" # Automatically detect GPU or CPU
model_name = "tanusrich/Mental_Health_Chatbot"
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16, # Reduce memory usage
device_map="cpu", # Automatically assigns to GPU if available
low_cpu_mem_usage=True,
max_memory={0: "3.5GiB", "cpu": "12GiB"}, # Optimize CPU memory
offload_folder=None
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
'''
model_save_path = "./model"
# Save model
model.save_pretrained(model_save_path)
# Save tokenizer
tokenizer.save_pretrained(model_save_path)'''
def generate_response(user_input):
inputs = tokenizer(user_input, return_tensors="pt").to("cpu")
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=150,
temperature=0.7,
top_k=50,
top_p=0.9,
repetition_penalty=1.2,
pad_token_id=tokenizer.eos_token_id
)
response = tokenizer.decode(output[0], skip_special_tokens=True)
# Extract only chatbot's latest response
chatbot_response = response.split("Chatbot:")[-1].strip()
# Update conversation history
conversation_history += chatbot_response + "\n"
return chatbot_response
# Continuous conversation loop
'''while True:
user_input = input("You: ") # Take user input
if user_input.lower() in ["exit", "quit", "stop"]:
print("Chatbot: Goodbye!")
break
response = generate_response(user_input)
print("Chatbot:", response)'''
# Initialize the ChatInterface
chatbot = gr.ChatInterface(fn=generate_response, title="Mental Health Chatbot")
chatbot.launch()
'''
# Example
user_input = "I'm feeling suicidal."
response = generate_response(user_input)
print("Chatbot: ", response)
'''