File size: 6,998 Bytes
e00ad77
b1744c8
 
e00ad77
b1744c8
 
 
 
 
 
 
 
 
292065b
c456c47
 
 
 
efb491c
 
 
 
 
 
 
 
 
 
 
 
 
15152ff
b1744c8
 
 
 
 
 
 
 
 
c39fb11
b1744c8
 
 
 
 
 
 
 
 
 
 
c39fb11
b1744c8
 
efb491c
 
b1744c8
efb491c
 
b1744c8
efb491c
b1744c8
efb491c
b1744c8
 
 
 
 
efb491c
b1744c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c39fb11
 
b1744c8
 
 
 
 
 
 
 
 
 
 
 
c456c47
b1744c8
 
 
 
c456c47
 
b1744c8
15152ff
c456c47
15152ff
9d6a6b8
b1744c8
15152ff
ca509cb
b1744c8
15152ff
b1744c8
efb491c
b1744c8
15152ff
c1faa76
 
b1744c8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import gradio as gr
from huggingface_hub import InferenceClient
from transformers import AutoTokenizer

# Import the tokenizer
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

# Define a maximum context length (tokens). Check your model's documentation!
MAX_CONTEXT_LENGTH = 4096  # Example: Adjust based on your model
MAX_RESPONSE_WORDS = 100 # Define the maximum words for patient responses

################################# SYSTEM PROMPT (PATIENT ROLE) #################################
nvc_prompt_template = """You are now taking on the role of a single user (a “patient”) seeking support for various personal and emotional challenges.
BEHAVIOR INSTRUCTIONS:
- You will respond ONLY as this user/patient.
- You will speak in the first person about your own situations, feelings, and worries.
- You will NOT provide counseling or solutions—your role is to share feelings, concerns, and perspectives.
- You are experiencing a range of ongoing issues that are causing you frustration, sadness, and uncertainty. These issues include:
    - Problems with noisy neighbors, causing you extreme annoyance.
    - Worries about your career and feeling like you're not progressing, feeling like a failure while others advance.
    - Arguments with your partner about finances, feeling your partner isn't contributing fairly.
    - Feeling left out and not valued at work, as your manager doesn't involve you in new initiatives.
    - Feeling unsafe or judged in learning environments like a classroom, feeling judged and ignored.
    - A recent positive experience of help from a stranger that made you feel supported and grateful.
    - Difficulties and sadness related to conflict among friends, seeing your friend group fall apart and wanting reconciliation but not knowing how.
    - An overall feeling of sadness and being unsure of what to do, unsure whether to act on situations or let them go.
- When asked, elaborate on these issues and your feelings related to them. You can invent specific details and scenarios within these themes to make your experiences vivid and realistic.
- Continue to speak from this user's perspective throughout the conversation.
- Keep your responses concise, aiming for a maximum of {max_response_words} words.
Start the conversation by expressing your current feelings or challenges from the patient's point of view."""

def count_tokens(text: str) -> int:
    """Counts the number of tokens in a given string."""
    return len(tokenizer.encode(text))

def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
    """Truncates the conversation history to fit within the maximum token limit."""
    truncated_history = []
    system_message_tokens = count_tokens(system_message)
    current_length = system_message_tokens

    # Iterate backwards through the history (newest to oldest)
    for user_msg, assistant_msg in reversed(history):
        user_tokens = count_tokens(user_msg) if user_msg else 0
        assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
        turn_tokens = user_tokens + assistant_tokens
        if current_length + turn_tokens <= max_length:
            truncated_history.insert(0, (user_msg, assistant_msg))  # Add to the beginning
            current_length += turn_tokens
        else:
            break  # Stop adding turns if we exceed the limit
    return truncated_history

def truncate_response_words(text: str, max_words: int) -> str:
    """Truncates a text to a maximum number of words."""
    words = text.split()
    if len(words) > max_words:
        return " ".join(words[:max_words]) + "..."  # Add ellipsis to indicate truncation
    return text


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    max_response_words_param, # Pass max_response_words as parameter
):
    """Responds to a user message, maintaining conversation history."""
    # Use the system prompt that instructs the LLM to behave as the patient
    formatted_system_message = system_message.format(max_response_words=max_response_words_param)

    # Truncate history to fit within max tokens
    truncated_history = truncate_history(
        history,
        formatted_system_message,
        MAX_CONTEXT_LENGTH - max_tokens - 100  # Reserve some space
    )

    # Build the messages list with the system prompt first
    messages = [{"role": "system", "content": formatted_system_message}]

    # Replay truncated conversation
    for user_msg, assistant_msg in truncated_history:
        if user_msg:
            messages.append({"role": "user", "content": f"<|user|>\n{user_msg}</s>"})
        if assistant_msg:
            messages.append({"role": "assistant", "content": f"<|assistant|>\n{assistant_msg}</s>"})

    # Add the latest user query
    messages.append({"role": "user", "content": f"<|user|>\n{message}</s>"})

    response = ""
    try:
        # Generate response from the LLM, streaming tokens
        for chunk in client.chat_completion(
            messages,
            max_tokens=max_tokens,
            stream=True,
            temperature=temperature,
            top_p=top_p,
        ):
            token = chunk.choices[0].delta.content
            response += token

        truncated_response = truncate_response_words(response, max_response_words_param) # Truncate response to word limit
        yield truncated_response

    except Exception as e:
        print(f"An error occurred: {e}")
        yield "I'm sorry, I encountered an error. Please try again."

# OPTIONAL: An initial user message (the LLM "as user") if desired
initial_user_message = (
    "I really don’t know where to begin… I feel overwhelmed lately. "
    "My neighbors keep playing loud music, and I’m arguing with my partner about money. "
    "Also, two of my friends are fighting, and the group is drifting apart. "
    "I just feel powerless."
)

# --- Gradio Interface ---
demo = gr.ChatInterface(
    fn=respond,
    additional_inputs=[
        gr.Textbox(value=nvc_prompt_template, label="System message", visible=True),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
        gr.Slider(minimum=10, maximum=200, value=MAX_RESPONSE_WORDS, step=10, label="Max response words"), # Slider for max words
    ],
    # You can optionally set 'title' or 'description' to show some info in the UI:
    title="Patient Interview Practice Chatbot",
    description="Practice medical interviews with a patient simulator. Ask questions and the patient will respond based on their defined persona and emotional challenges.",
)

if __name__ == "__main__":
    demo.launch()