ruslanmv commited on
Commit
c456c47
·
verified ·
1 Parent(s): 14366b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -18
app.py CHANGED
@@ -6,26 +6,33 @@ from transformers import AutoTokenizer # Import the tokenizer
6
  tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
- # Define a maximum context length (tokens). Check your model's documentation!
10
- MAX_CONTEXT_LENGTH = 4096 # Example: Adjust this based on your model!
11
 
12
- # Read the default prompt from a file
13
- with open("prompt.txt", "r") as file:
14
- nvc_prompt_template = file.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
  def count_tokens(text: str) -> int:
17
  """Counts the number of tokens in a given string."""
18
  return len(tokenizer.encode(text))
19
 
20
  def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
21
- """Truncates the conversation history to fit within the maximum token limit.
22
- Args:
23
- history: The conversation history (list of user/assistant tuples).
24
- system_message: The system message.
25
- max_length: The maximum number of tokens allowed.
26
- Returns:
27
- The truncated history.
28
- """
29
  truncated_history = []
30
  system_message_tokens = count_tokens(system_message)
31
  current_length = system_message_tokens
@@ -52,22 +59,33 @@ def respond(
52
  temperature,
53
  top_p,
54
  ):
55
- """Responds to a user message, maintaining conversation history, using special tokens and message list."""
56
- formatted_system_message = nvc_prompt_template
 
 
 
 
 
 
 
 
57
 
58
- truncated_history = truncate_history(history, formatted_system_message, MAX_CONTEXT_LENGTH - max_tokens - 100) # Reserve space for the new message and some generation
 
59
 
60
- messages = [{"role": "system", "content": formatted_system_message}] # Start with system message
61
  for user_msg, assistant_msg in truncated_history:
62
  if user_msg:
63
  messages.append({"role": "user", "content": f"<|user|>\n{user_msg}</s>"})
64
  if assistant_msg:
65
  messages.append({"role": "assistant", "content": f"<|assistant|>\n{assistant_msg}</s>"})
66
 
 
67
  messages.append({"role": "user", "content": f"<|user|>\n{message}</s>"})
68
 
69
  response = ""
70
  try:
 
71
  for chunk in client.chat_completion(
72
  messages,
73
  max_tokens=max_tokens,
@@ -82,15 +100,37 @@ def respond(
82
  print(f"An error occurred: {e}")
83
  yield "I'm sorry, I encountered an error. Please try again."
84
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  # --- Gradio Interface ---
 
 
 
 
 
86
  demo = gr.ChatInterface(
87
- respond,
88
  additional_inputs=[
89
  gr.Textbox(value=nvc_prompt_template, label="System message", visible=True),
90
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
91
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
92
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
93
  ],
 
 
 
 
 
94
  )
95
 
96
  if __name__ == "__main__":
 
6
  tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
9
+ # Define a maximum context length (tokens). Check your model's documentation!
10
+ MAX_CONTEXT_LENGTH = 4096 # Example: Adjust based on your model
11
 
12
+ ################################
13
+ # SYSTEM PROMPT (PATIENT ROLE) #
14
+ ################################
15
+ # This is the core text that tells the LLM to behave as the "patient."
16
+ # You can store it in "prompt.txt" or include it directly here as a string.
17
+ nvc_prompt_template = """
18
+ You are now taking on the role of a single user (a “patient”) seeking support for various personal and emotional challenges.
19
+
20
+ BEHAVIOR INSTRUCTIONS:
21
+ - You will respond ONLY as this user/patient.
22
+ - You will speak in the first person about your own situations, feelings, and worries.
23
+ - You will NOT provide counseling or solutions—your role is to share feelings, concerns, and perspectives.
24
+ - You have multiple ongoing issues: conflicts with neighbors, career insecurities, arguments about money, feeling excluded at work, feeling unsafe in the classroom, and so on. You’re also experiencing sadness about two friends fighting and your friend group possibly falling apart.
25
+ - Continue to speak from this user's perspective when the conversation continues.
26
+
27
+ Start the conversation by expressing your current feelings or challenges from the patient's point of view.
28
+ """
29
 
30
  def count_tokens(text: str) -> int:
31
  """Counts the number of tokens in a given string."""
32
  return len(tokenizer.encode(text))
33
 
34
  def truncate_history(history: list[tuple[str, str]], system_message: str, max_length: int) -> list[tuple[str, str]]:
35
+ """Truncates the conversation history to fit within the maximum token limit."""
 
 
 
 
 
 
 
36
  truncated_history = []
37
  system_message_tokens = count_tokens(system_message)
38
  current_length = system_message_tokens
 
59
  temperature,
60
  top_p,
61
  ):
62
+ """Responds to a user message, maintaining conversation history."""
63
+ # Use the system prompt that instructs the LLM to behave as the patient
64
+ formatted_system_message = system_message
65
+
66
+ # Truncate history to fit within max tokens
67
+ truncated_history = truncate_history(
68
+ history,
69
+ formatted_system_message,
70
+ MAX_CONTEXT_LENGTH - max_tokens - 100 # Reserve some space
71
+ )
72
 
73
+ # Build the messages list with the system prompt first
74
+ messages = [{"role": "system", "content": formatted_system_message}]
75
 
76
+ # Replay truncated conversation
77
  for user_msg, assistant_msg in truncated_history:
78
  if user_msg:
79
  messages.append({"role": "user", "content": f"<|user|>\n{user_msg}</s>"})
80
  if assistant_msg:
81
  messages.append({"role": "assistant", "content": f"<|assistant|>\n{assistant_msg}</s>"})
82
 
83
+ # Add the latest user query
84
  messages.append({"role": "user", "content": f"<|user|>\n{message}</s>"})
85
 
86
  response = ""
87
  try:
88
+ # Generate response from the LLM, streaming tokens
89
  for chunk in client.chat_completion(
90
  messages,
91
  max_tokens=max_tokens,
 
100
  print(f"An error occurred: {e}")
101
  yield "I'm sorry, I encountered an error. Please try again."
102
 
103
+ ###################
104
+ # INITIAL MESSAGE #
105
+ ###################
106
+ # This is how we make the LLM "start" by playing the role of the user/patient.
107
+ # Essentially, we seed the conversation with an initial user message (from the LLM).
108
+ initial_user_message = (
109
+ "I really don’t know where to begin… I feel so overwhelmed lately. "
110
+ "My neighbors keep playing loud music, I’m arguing with my partner about money, "
111
+ "and on top of that, two of my friends are in a fight and it’s splitting the group. "
112
+ "I just feel powerless in all these situations."
113
+ )
114
+
115
  # --- Gradio Interface ---
116
+ def start_conversation():
117
+ """Creates the initial chat state, so the LLM 'as user' starts talking."""
118
+ # Return a conversation with a single user message: the LLM’s "patient" message
119
+ return [("Hi there!", initial_user_message)]
120
+
121
  demo = gr.ChatInterface(
122
+ fn=respond,
123
  additional_inputs=[
124
  gr.Textbox(value=nvc_prompt_template, label="System message", visible=True),
125
  gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
126
  gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
127
  gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
128
  ],
129
+ # We can initialize the conversation so the LLM starts with a 'user' message.
130
+ # Depending on your version of Gradio, you might set `default_message=...` or
131
+ # `submit_on_start=True`. Adjust as needed.
132
+ # Here we use a function that returns an initial conversation state:
133
+ chatbot = start_conversation()
134
  )
135
 
136
  if __name__ == "__main__":