Spaces:

bobpopboom
/

testing

Sleeping

File size: 2,383 Bytes

432cd4a
f43b68f
3f3da62
432cd4a
abe2d0f
 
e929713
f43b68f
abe2d0f
 
f43b68f
 
 
 
 
 
 
 
 
 
 
 
 
 
abe2d0f
e929713
 
 
 
 
432cd4a
 
 
e929713
432cd4a
 
 
 
14ddf0d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
432cd4a
3f3da62
e929713
432cd4a
 
 
e929713
 
 
 
432cd4a
 
 
 
 
 
 
 
 
 
 
 
 
e929713

import gradio as gr
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import torch

model_id = "thrishala/mental_health_chatbot"

try:
    # Load model with int8 quantization for CPU
    model = AutoModelForCausalLM.from_pretrained(
        model_id,
        device_map="cpu",
        torch_dtype=torch.float16,  # Use float16 for reduced memory
        low_cpu_mem_usage=True,     # Enable memory optimization
    )
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    
    # Create pipeline with optimizations
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        torch_dtype=torch.float16,
    )

except Exception as e:
    print(f"Error loading model: {e}")
    exit()

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,  # You can use this for initial instructions
    max_tokens,
    temperature,
    top_p,
):
    # 2. Construct the Prompt (Crucial!)
    prompt = f"{system_message}\n" 
    for user_msg, bot_msg in history:
        prompt += f"User: {user_msg}\nAssistant: {bot_msg}\n"
    prompt += f"User: {message}\nAssistant:"
    
    # 3. Generate with the Pipeline
    try:
        response = pipe(
            prompt,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
        )[0]["generated_text"]
        #Extract the bot's reply (adjust if your model format is different)
        bot_response = response.split("Assistant:")[-1].strip()
        yield bot_response
    
    except Exception as e:
        print(f"Error during generation: {e}")
        yield "An error occurred during generation." #Handle generation errors.


# 4. Gradio Interface (No changes needed here)
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(
            value="You are a friendly and helpful mental health chatbot.",
            label="System message",
        ),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)

if __name__ == "__main__":
    demo.launch()