Spaces:

ericwithpy
/

Mental_Health

Sleeping

App Files Files Community

Mental_Health / app.py

ericwithpy

Update app.py

e62ae46 verified about 2 months ago

raw

history blame

1.96 kB

	import torch
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer

	device = "cuda" if torch.cuda.is_available() else "cpu" # Automatically detect GPU or CPU
	model_name = "tanusrich/Mental_Health_Chatbot"

	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16, # Reduce memory usage
	device_map="cpu", # Automatically assigns to GPU if available
	low_cpu_mem_usage=True,
	max_memory={0: "3.5GiB", "cpu": "12GiB"}, # Optimize CPU memory
	offload_folder=None
	)

	tokenizer = AutoTokenizer.from_pretrained(model_name)
	'''
	model_save_path = "./model"
	# Save model
	model.save_pretrained(model_save_path)

	# Save tokenizer
	tokenizer.save_pretrained(model_save_path)'''

	def generate_response(user_input):
	inputs = tokenizer(user_input, return_tensors="pt").to("cpu")
	with torch.no_grad():
	output = model.generate(
	**inputs,
	max_new_tokens=150,
	temperature=0.7,
	top_k=50,
	top_p=0.9,
	repetition_penalty=1.2,
	pad_token_id=tokenizer.eos_token_id
	)
	response = tokenizer.decode(output[0], skip_special_tokens=True)
	# Extract only chatbot's latest response
	chatbot_response = response.split("Chatbot:")[-1].strip()

	# Update conversation history
	conversation_history += chatbot_response + "\n"
	return chatbot_response

	# Continuous conversation loop
	'''while True:
	user_input = input("You: ") # Take user input
	if user_input.lower() in ["exit", "quit", "stop"]:
	print("Chatbot: Goodbye!")
	break

	response = generate_response(user_input)
	print("Chatbot:", response)'''


	# Initialize the ChatInterface
	chatbot = gr.ChatInterface(fn=generate_response, title="Mental Health Chatbot")
	chatbot.launch()


	'''
	# Example
	user_input = "I'm feeling suicidal."
	response = generate_response(user_input)
	print("Chatbot: ", response)
	'''