ruslanmv commited on
Commit
292065b
·
verified ·
1 Parent(s): bf10870

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -52
app.py CHANGED
@@ -1,28 +1,27 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
- from transformers import AutoTokenizer # Import the tokenizer
4
 
5
- # Use the appropriate tokenizer for your model.
6
  tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
- # Define a maximum context length (tokens). Check your model's documentation!
10
- MAX_CONTEXT_LENGTH = 4096 # Example: Adjust based on your model
11
-
12
- ################################
13
- # SYSTEM PROMPT (PATIENT ROLE) #
14
- ################################
15
- nvc_prompt_template = """
16
- You are now taking on the role of a single user (a “patient”) seeking support for various personal and emotional challenges.
17
 
 
 
 
 
18
  BEHAVIOR INSTRUCTIONS:
19
  - You will respond ONLY as this user/patient.
20
  - You will speak in the first person about your own situations, feelings, and worries.
21
  - You will NOT provide counseling or solutions—your role is to share feelings, concerns, and perspectives.
22
- - You have multiple ongoing issues: conflicts with neighbors, career insecurities, arguments about money, feeling excluded at work, feeling unsafe in the classroom, and so on. You’re also experiencing sadness about two friends fighting and your friend group possibly falling apart.
 
23
  - Continue to speak from this user's perspective when the conversation continues.
24
-
25
- Start the conversation by expressing your current feelings or challenges from the patient's point of view.
26
  """
27
 
28
  def count_tokens(text: str) -> int:
@@ -30,60 +29,40 @@ def count_tokens(text: str) -> int:
30
  return len(tokenizer.encode(text))
31
 
32
  def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
33
- """Truncates the conversation history to fit within the maximum token limit."""
34
  truncated_history = []
35
- system_message_tokens = count_tokens(system_message)
36
- current_length = system_message_tokens
37
-
38
- # Iterate backwards through the history (newest to oldest)
39
  for user_msg, assistant_msg in reversed(history):
40
  user_tokens = count_tokens(user_msg) if user_msg else 0
41
  assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
42
  turn_tokens = user_tokens + assistant_tokens
43
-
44
  if current_length + turn_tokens <= max_length:
45
- truncated_history.insert(0, (user_msg, assistant_msg)) # Add to the beginning
46
  current_length += turn_tokens
47
  else:
48
- break # Stop adding turns if we exceed the limit
49
-
50
  return truncated_history
51
 
52
- def respond(
53
- message,
54
- history: list[tuple[str, str]],
55
- system_message,
56
- max_tokens,
57
- temperature,
58
- top_p,
59
- ):
60
- """Responds to a user message, maintaining conversation history."""
61
- # Use the system prompt that instructs the LLM to behave as the patient
62
  formatted_system_message = system_message
63
-
64
- # Truncate history to fit within max tokens
65
- truncated_history = truncate_history(
66
- history,
67
- formatted_system_message,
68
- MAX_CONTEXT_LENGTH - max_tokens - 100 # Reserve some space
69
- )
70
-
71
- # Build the messages list with the system prompt first
72
  messages = [{"role": "system", "content": formatted_system_message}]
73
-
74
- # Replay truncated conversation
75
  for user_msg, assistant_msg in truncated_history:
76
  if user_msg:
77
  messages.append({"role": "user", "content": f"<|user|>\n{user_msg}</s>"})
78
  if assistant_msg:
79
  messages.append({"role": "assistant", "content": f"<|assistant|>\n{assistant_msg}</s>"})
80
-
81
- # Add the latest user query
82
  messages.append({"role": "user", "content": f"<|user|>\n{message}</s>"})
83
-
84
  response = ""
85
  try:
86
- # Generate response from the LLM, streaming tokens
87
  for chunk in client.chat_completion(
88
  messages,
89
  max_tokens=max_tokens,
@@ -92,13 +71,23 @@ def respond(
92
  top_p=top_p,
93
  ):
94
  token = chunk.choices[0].delta.content
95
- response += token
96
- yield response
 
 
 
 
 
 
 
 
 
 
97
  except Exception as e:
98
  print(f"An error occurred: {e}")
99
  yield "I'm sorry, I encountered an error. Please try again."
100
 
101
- # OPTIONAL: An initial user message (the LLM "as user") if desired
102
  initial_user_message = (
103
  "I really don’t know where to begin… I feel overwhelmed lately. "
104
  "My neighbors keep playing loud music, and I’m arguing with my partner about money. "
@@ -115,9 +104,8 @@ demo = gr.ChatInterface(
115
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
116
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
117
  ],
118
- # You can optionally set 'title' or 'description' to show some info in the UI:
119
  title="NVC Patient Chatbot",
120
- description="This chatbot behaves like a user/patient describing personal challenges."
121
  )
122
 
123
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from transformers import AutoTokenizer
4
 
5
+ # Initialize the tokenizer and client.
6
  tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
+ # Define maximum context length (tokens); adjust based on your model.
10
+ MAX_CONTEXT_LENGTH = 4096
 
 
 
 
 
 
11
 
12
+ #################################
13
+ # SYSTEM PROMPT (PATIENT ROLE) #
14
+ #################################
15
+ nvc_prompt_template = """You are now taking on the role of a single user (a “patient”) seeking support for various personal and emotional challenges.
16
  BEHAVIOR INSTRUCTIONS:
17
  - You will respond ONLY as this user/patient.
18
  - You will speak in the first person about your own situations, feelings, and worries.
19
  - You will NOT provide counseling or solutions—your role is to share feelings, concerns, and perspectives.
20
+ - You have multiple ongoing issues: conflicts with neighbors, career insecurities, arguments about money, feeling excluded at work, feeling unsafe in the classroom, etc.
21
+ - You’re also experiencing sadness about two friends fighting and your friend group possibly falling apart.
22
  - Continue to speak from this user's perspective when the conversation continues.
23
+ - Start the conversation by expressing your current feelings or challenges from the patient's point of view.
24
+ - Your responses should be no more than 100 words.
25
  """
26
 
27
  def count_tokens(text: str) -> int:
 
29
  return len(tokenizer.encode(text))
30
 
31
  def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
32
+ """Truncates conversation history to fit within the token limit."""
33
  truncated_history = []
34
+ current_length = count_tokens(system_message)
35
+ # Iterate backwards (newest first) and include turns until the limit is reached.
 
 
36
  for user_msg, assistant_msg in reversed(history):
37
  user_tokens = count_tokens(user_msg) if user_msg else 0
38
  assistant_tokens = count_tokens(assistant_msg) if assistant_msg else 0
39
  turn_tokens = user_tokens + assistant_tokens
 
40
  if current_length + turn_tokens <= max_length:
41
+ truncated_history.insert(0, (user_msg, assistant_msg))
42
  current_length += turn_tokens
43
  else:
44
+ break
 
45
  return truncated_history
46
 
47
+ def respond(message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p):
48
+ """
49
+ Generates a response from the patient chatbot.
50
+ It streams tokens from the LLM and stops once the response reaches 100 words.
51
+ """
 
 
 
 
 
52
  formatted_system_message = system_message
53
+ truncated_history = truncate_history(history, formatted_system_message, MAX_CONTEXT_LENGTH - max_tokens - 100)
54
+
55
+ # Build the conversation messages with the system prompt first.
 
 
 
 
 
 
56
  messages = [{"role": "system", "content": formatted_system_message}]
 
 
57
  for user_msg, assistant_msg in truncated_history:
58
  if user_msg:
59
  messages.append({"role": "user", "content": f"<|user|>\n{user_msg}</s>"})
60
  if assistant_msg:
61
  messages.append({"role": "assistant", "content": f"<|assistant|>\n{assistant_msg}</s>"})
 
 
62
  messages.append({"role": "user", "content": f"<|user|>\n{message}</s>"})
63
+
64
  response = ""
65
  try:
 
66
  for chunk in client.chat_completion(
67
  messages,
68
  max_tokens=max_tokens,
 
71
  top_p=top_p,
72
  ):
73
  token = chunk.choices[0].delta.content
74
+ candidate = response + token
75
+ # If adding the token exceeds 100 words, trim and stop.
76
+ if len(candidate.split()) > 100:
77
+ allowed = 100 - len(response.split())
78
+ token_words = token.split()
79
+ token_trimmed = " ".join(token_words[:allowed])
80
+ response += token_trimmed
81
+ yield token_trimmed
82
+ break
83
+ else:
84
+ response = candidate
85
+ yield token
86
  except Exception as e:
87
  print(f"An error occurred: {e}")
88
  yield "I'm sorry, I encountered an error. Please try again."
89
 
90
+ # OPTIONAL: An initial user message (if desired)
91
  initial_user_message = (
92
  "I really don’t know where to begin… I feel overwhelmed lately. "
93
  "My neighbors keep playing loud music, and I’m arguing with my partner about money. "
 
104
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
105
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
106
  ],
 
107
  title="NVC Patient Chatbot",
108
+ description="This chatbot behaves like a user/patient describing personal challenges.",
109
  )
110
 
111
  if __name__ == "__main__":