Patient-Chatbot-Ros

Sleeping

App Files Files Community

ruslanmv commited on Feb 26

Commit

b1744c8

verified ·

1 Parent(s): 40e0f8e

Update app.py

Browse files

Files changed (1) hide show

app.py +94 -120

app.py CHANGED Viewed

@@ -1,51 +1,16 @@
-import os
 import gradio as gr
-# ------------------------------------------------------------------------------
-# Environment and Model/Client Initialization
-# ------------------------------------------------------------------------------
-# Try to import google.colab to decide whether to load a local model or use InferenceClient.
-try:
-    from google.colab import userdata  # In Colab, use local model inference.
-    HF_TOKEN = userdata.get('HF_TOKEN')
-    import torch
-    from transformers import AutoTokenizer, AutoModelForCausalLM
-    # Small performance tweak if your input sizes remain similar.
-    torch.backends.cudnn.benchmark = True
-    model_name = "HuggingFaceH4/zephyr-7b-beta"
-    # Pass token if required for private models.
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        use_auth_token=HF_TOKEN,
-        torch_dtype=torch.bfloat16,
-        device_map="auto"
-    )
-    # Optionally compile the model for extra speed if using PyTorch 2.0+
-    if hasattr(torch, "compile"):
-        model = torch.compile(model)
-    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=HF_TOKEN)
-    inference_mode = "local"
-except ImportError:
-    # Not in Google Colab – use the Hugging Face InferenceClient.
-    HF_TOKEN = os.getenv("HF_TOKEN")
-    if not HF_TOKEN:
-        raise ValueError("HF_TOKEN environment variable not set")
-    from huggingface_hub import InferenceClient
-    from transformers import AutoTokenizer
-    model_name = "HuggingFaceH4/zephyr-7b-beta"
-    tokenizer = AutoTokenizer.from_pretrained(model_name)
-    # Pass the token to the client to avoid authentication errors.
-    client = InferenceClient(model_name, token=HF_TOKEN)
-    inference_mode = "client"
-# ------------------------------------------------------------------------------
-# SYSTEM PROMPT (PATIENT ROLE)
-# ------------------------------------------------------------------------------
 nvc_prompt_template = """You are now taking on the role of a single user (a “patient”) seeking support for various personal and emotional challenges.
 BEHAVIOR INSTRUCTIONS:
 - You will respond ONLY as this user/patient.
@@ -65,102 +30,111 @@ BEHAVIOR INSTRUCTIONS:
 - Keep your responses concise, aiming for a maximum of {max_response_words} words.
 Start the conversation by expressing your current feelings or challenges from the patient's point of view."""
-# ------------------------------------------------------------------------------
-# Utility Functions
-# ------------------------------------------------------------------------------
-def build_prompt(history: list[tuple[str, str]], system_message: str, message: str, max_response_words: int) -> str:
-    """
-    Build a text prompt that starts with the system message (with a max word limit),
-    followed by the conversation history (with "Doctor:" and "Patient:" lines), and
-    ends with a new "Doctor:" line prompting the patient to reply.
-    """
-    prompt = system_message.format(max_response_words=max_response_words) + "\n"
-    for user_msg, assistant_msg in history:
-        prompt += f"Doctor: {user_msg}\n"
-        if assistant_msg:
-            prompt += f"Patient: {assistant_msg}\n"
-    prompt += f"Doctor: {message}\nPatient: "
-    return prompt
-def truncate_response(text: str, max_words: int) -> str:
-    """
-    Truncate the response text to the specified maximum number of words.
-    """
     words = text.split()
     if len(words) > max_words:
-        return " ".join(words[:max_words]) + "..."
     return text
-# ------------------------------------------------------------------------------
-# Response Function
-# ------------------------------------------------------------------------------
 def respond(
-    message: str,
     history: list[tuple[str, str]],
-    system_message: str,
-    max_tokens: int,
-    temperature: float,
-    top_p: float,
-    max_response_words: int,
 ):
-    """
-    Generate a response based on the built prompt.
-    If running locally (in Colab), use the loaded model; otherwise, use InferenceClient.
-    """
-    prompt = build_prompt(history, system_message, message, max_response_words)
-    if inference_mode == "local":
-        # Tokenize the prompt and generate a response using the local model.
-        input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
-        output_ids = model.generate(
-            input_ids,
-            max_new_tokens=max_tokens,
-            do_sample=True,
-            temperature=temperature,
-            top_p=top_p,
-        )
-        full_generated_text = tokenizer.decode(output_ids[0], skip_special_tokens=True)
-        generated_response = full_generated_text[len(prompt):].strip()
-        final_response = truncate_response(generated_response, max_response_words)
-        return final_response
-    else:
-        # Use InferenceClient to generate a response.
-        response = client.text_generation(
-            prompt,
-            max_new_tokens=max_tokens,
-            do_sample=True,
             temperature=temperature,
             top_p=top_p,
-        )
-        full_generated_text = response[0]['generated_text']
-        generated_response = full_generated_text[len(prompt):].strip()
-        final_response = truncate_response(generated_response, max_response_words)
-        return final_response
-# ------------------------------------------------------------------------------
-# Optional Initial Message and Gradio Interface
-# ------------------------------------------------------------------------------
 initial_user_message = (
-    "I’m sorry you’ve been feeling overwhelmed. Could you tell me more "
-    "about your arguments with your partner and how that’s affecting you?"
 )
 demo = gr.ChatInterface(
     fn=respond,
     additional_inputs=[
         gr.Textbox(value=nvc_prompt_template, label="System message", visible=True),
-        gr.Slider(minimum=1, maximum=2048, value=256, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
-        gr.Slider(minimum=10, maximum=200, value=100, step=10, label="Max response words"),
     ],
     title="Patient Interview Practice Chatbot",
-    description=(
-        "Simulate a patient interview. You (the user) act as the doctor, "
-        "and the chatbot replies with the patient's perspective only."
-    ),
 )
 if __name__ == "__main__":
-    demo.launch(share=True)

 import gradio as gr
+from huggingface_hub import InferenceClient
+from transformers import AutoTokenizer
+# Import the tokenizer
+tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
+client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+# Define a maximum context length (tokens). Check your model's documentation!
+MAX_CONTEXT_LENGTH = 4096  # Example: Adjust based on your model
+MAX_RESPONSE_WORDS = 100 # Define the maximum words for patient responses
+################################# SYSTEM PROMPT (PATIENT ROLE) #################################
 nvc_prompt_template = """You are now taking on the role of a single user (a “patient”) seeking support for various personal and emotional challenges.
 BEHAVIOR INSTRUCTIONS:
 - You will respond ONLY as this user/patient.
 - Keep your responses concise, aiming for a maximum of {max_response_words} words.
 Start the conversation by expressing your current feelings or challenges from the patient's point of view."""
+def count_tokens(text: str) -> int:
+    """Counts the number of tokens in a given string."""
+    return len(tokenizer.encode(text))
+def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
+    """Truncates the conversation history to fit within the maximum token limit."""
+    truncated_history = []
+    system_message_tokens = count_tokens(system_message)
+    current_length = system_message_tokens
+    # Iterate backwards through the history (newest to oldest)
+    for user_msg, assistant_msg in reversed(history):
+        user_tokens = count_tokens(user_msg) if user_msg else 0
+        assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
+        turn_tokens = user_tokens + assistant_tokens
+        if current_length + turn_tokens <= max_length:
+            truncated_history.insert(0, (user_msg, assistant_msg))  # Add to the beginning
+            current_length += turn_tokens
+        else:
+            break  # Stop adding turns if we exceed the limit
+    return truncated_history
+def truncate_response_words(text: str, max_words: int) -> str:
+    """Truncates a text to a maximum number of words."""
     words = text.split()
     if len(words) > max_words:
+        return " ".join(words[:max_words]) + "..."  # Add ellipsis to indicate truncation
     return text
 def respond(
+    message,
     history: list[tuple[str, str]],
+    system_message,
+    max_tokens,
+    temperature,
+    top_p,
+    max_response_words_param, # Pass max_response_words as parameter
 ):
+    """Responds to a user message, maintaining conversation history."""
+    # Use the system prompt that instructs the LLM to behave as the patient
+    formatted_system_message = system_message.format(max_response_words=max_response_words_param)
+    # Truncate history to fit within max tokens
+    truncated_history = truncate_history(
+        history,
+        formatted_system_message,
+        MAX_CONTEXT_LENGTH - max_tokens - 100  # Reserve some space
+    )
+    # Build the messages list with the system prompt first
+    messages = [{"role": "system", "content": formatted_system_message}]
+    # Replay truncated conversation
+    for user_msg, assistant_msg in truncated_history:
+        if user_msg:
+            messages.append({"role": "user", "content": f"<|user|>\n{user_msg}</s>"})
+        if assistant_msg:
+            messages.append({"role": "assistant", "content": f"<|assistant|>\n{assistant_msg}</s>"})
+    # Add the latest user query
+    messages.append({"role": "user", "content": f"<|user|>\n{message}</s>"})
+    response = ""
+    try:
+        # Generate response from the LLM, streaming tokens
+        for chunk in client.chat_completion(
+            messages,
+            max_tokens=max_tokens,
+            stream=True,
             temperature=temperature,
             top_p=top_p,
+        ):
+            token = chunk.choices[0].delta.content
+            response += token
+        truncated_response = truncate_response_words(response, max_response_words_param) # Truncate response to word limit
+        yield truncated_response
+    except Exception as e:
+        print(f"An error occurred: {e}")
+        yield "I'm sorry, I encountered an error. Please try again."
+# OPTIONAL: An initial user message (the LLM "as user") if desired
 initial_user_message = (
+    "I really don’t know where to begin… I feel overwhelmed lately. "
+    "My neighbors keep playing loud music, and I’m arguing with my partner about money. "
+    "Also, two of my friends are fighting, and the group is drifting apart. "
+    "I just feel powerless."
 )
+# --- Gradio Interface ---
 demo = gr.ChatInterface(
     fn=respond,
     additional_inputs=[
         gr.Textbox(value=nvc_prompt_template, label="System message", visible=True),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
         gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
+        gr.Slider(minimum=10, maximum=200, value=MAX_RESPONSE_WORDS, step=10, label="Max response words"), # Slider for max words
     ],
+    # You can optionally set 'title' or 'description' to show some info in the UI:
     title="Patient Interview Practice Chatbot",
+    description="Practice medical interviews with a patient simulator. Ask questions and the patient will respond based on their defined persona and emotional challenges.",
 )
 if __name__ == "__main__":
+    demo.launch()