ruslanmv commited on
Commit
bf10870
·
verified ·
1 Parent(s): 097b9b2

Create patient1.py

Browse files
Files changed (1) hide show
  1. patient1.py +122 -0
patient1.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from huggingface_hub import InferenceClient
3
+ from transformers import AutoTokenizer # Import the tokenizer
4
+
5
+ # Use the appropriate tokenizer for your model.
6
+ tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
7
+ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
+
9
+ # Define a maximum context length (tokens). Check your model's documentation!
10
+ MAX_CONTEXT_LENGTH = 4096 # Example: Adjust based on your model
11
+
12
+ ################################
13
+ # SYSTEM PROMPT (PATIENT ROLE) #
14
+ ################################
15
+ nvc_prompt_template = """
16
+ You are now taking on the role of a single user (a “patient”) seeking support for various personal and emotional challenges.
17
+ BEHAVIOR INSTRUCTIONS:
18
+ - You will respond ONLY as this user/patient.
19
+ - You will speak in the first person about your own situations, feelings, and worries.
20
+ - You will NOT provide counseling or solutions—your role is to share feelings, concerns, and perspectives.
21
+ - You have multiple ongoing issues: conflicts with neighbors, career insecurities, arguments about money, feeling excluded at work, feeling unsafe in the classroom, and so on. You’re also experiencing sadness about two friends fighting and your friend group possibly falling apart.
22
+ - Continue to speak from this user's perspective when the conversation continues.
23
+ Start the conversation by expressing your current feelings or challenges from the patient's point of view.
24
+ """
25
+
26
+ def count_tokens(text: str) -> int:
27
+ """Counts the number of tokens in a given string."""
28
+ return len(tokenizer.encode(text))
29
+
30
+ def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
31
+ """Truncates the conversation history to fit within the maximum token limit."""
32
+ truncated_history = []
33
+ system_message_tokens = count_tokens(system_message)
34
+ current_length = system_message_tokens
35
+
36
+ # Iterate backwards through the history (newest to oldest)
37
+ for user_msg, assistant_msg in reversed(history):
38
+ user_tokens = count_tokens(user_msg) if user_msg else 0
39
+ assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
40
+ turn_tokens = user_tokens + assistant_tokens
41
+
42
+ if current_length + turn_tokens <= max_length:
43
+ truncated_history.insert(0, (user_msg, assistant_msg)) # Add to the beginning
44
+ current_length += turn_tokens
45
+ else:
46
+ break # Stop adding turns if we exceed the limit
47
+
48
+ return truncated_history
49
+
50
+ def respond(
51
+ message,
52
+ history: list[tuple[str, str]],
53
+ system_message,
54
+ max_tokens,
55
+ temperature,
56
+ top_p,
57
+ ):
58
+ """Responds to a user message, maintaining conversation history."""
59
+ # Use the system prompt that instructs the LLM to behave as the patient
60
+ formatted_system_message = system_message
61
+
62
+ # Truncate history to fit within max tokens
63
+ truncated_history = truncate_history(
64
+ history,
65
+ formatted_system_message,
66
+ MAX_CONTEXT_LENGTH - max_tokens - 100 # Reserve some space
67
+ )
68
+
69
+ # Build the messages list with the system prompt first
70
+ messages = [{"role": "system", "content": formatted_system_message}]
71
+
72
+ # Replay truncated conversation
73
+ for user_msg, assistant_msg in truncated_history:
74
+ if user_msg:
75
+ messages.append({"role": "user", "content": f"<|user|>\n{user_msg}</s>"})
76
+ if assistant_msg:
77
+ messages.append({"role": "assistant", "content": f"<|assistant|>\n{assistant_msg}</s>"})
78
+
79
+ # Add the latest user query
80
+ messages.append({"role": "user", "content": f"<|user|>\n{message}</s>"})
81
+
82
+ response = ""
83
+ try:
84
+ # Generate response from the LLM, streaming tokens
85
+ for chunk in client.chat_completion(
86
+ messages,
87
+ max_tokens=max_tokens,
88
+ stream=True,
89
+ temperature=temperature,
90
+ top_p=top_p,
91
+ ):
92
+ token = chunk.choices[0].delta.content
93
+ response += token
94
+ yield response
95
+ except Exception as e:
96
+ print(f"An error occurred: {e}")
97
+ yield "I'm sorry, I encountered an error. Please try again."
98
+
99
+ # OPTIONAL: An initial user message (the LLM "as user") if desired
100
+ initial_user_message = (
101
+ "I really don’t know where to begin… I feel overwhelmed lately. "
102
+ "My neighbors keep playing loud music, and I’m arguing with my partner about money. "
103
+ "Also, two of my friends are fighting, and the group is drifting apart. "
104
+ "I just feel powerless."
105
+ )
106
+
107
+ # --- Gradio Interface ---
108
+ demo = gr.ChatInterface(
109
+ fn=respond,
110
+ additional_inputs=[
111
+ gr.Textbox(value=nvc_prompt_template, label="System message", visible=True),
112
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
113
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
114
+ gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
115
+ ],
116
+ # You can optionally set 'title' or 'description' to show some info in the UI:
117
+ title="NVC Patient Chatbot",
118
+ description="This chatbot behaves like a user/patient describing personal challenges."
119
+ )
120
+
121
+ if __name__ == "__main__":
122
+ demo.launch()