s-a-malik commited on
Commit
b501b77
·
1 Parent(s): ffed90e
Files changed (1) hide show
  1. app.py +19 -28
app.py CHANGED
@@ -26,9 +26,10 @@ DESCRIPTION = """
26
  """
27
 
28
  EXAMPLES = [
29
- ["What is the capital of France?", "You are a helpful assistant.", []],
30
- ["Explain the theory of relativity in simple terms.", "You are an expert physicist explaining concepts to a layman.", []],
31
- ["Write a short poem about artificial intelligence.", "You are a creative poet with a interest in technology.", []]
 
32
  ]
33
 
34
  if torch.cuda.is_available():
@@ -90,22 +91,7 @@ class CustomStreamer(TextIteratorStreamer):
90
  # gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
91
  # input_ids = input_ids.to(model.device)
92
 
93
- # streamer = CustomStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
94
- # generation_kwargs = dict(
95
- # input_ids=input_ids,
96
- # max_new_tokens=max_new_tokens,
97
- # do_sample=True,
98
- # top_p=top_p,
99
- # top_k=top_k,
100
- # temperature=temperature,
101
- # repetition_penalty=repetition_penalty,
102
- # streamer=streamer,
103
- # output_hidden_states=True,
104
- # return_dict_in_generate=True,
105
- # )
106
-
107
- # thread = Thread(target=model.generate, kwargs=generation_kwargs)
108
- # thread.start()
109
 
110
  # se_highlighted_text = ""
111
  # acc_highlighted_text = ""
@@ -133,7 +119,6 @@ class CustomStreamer(TextIteratorStreamer):
133
  @spaces.GPU
134
  def generate(
135
  message: str,
136
- chat_history: List[Tuple[str, str]],
137
  system_prompt: str,
138
  max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
139
  temperature: float = 0.6,
@@ -144,8 +129,6 @@ def generate(
144
  conversation = []
145
  if system_prompt:
146
  conversation.append({"role": "system", "content": system_prompt})
147
- for user, assistant in chat_history:
148
- conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
149
  conversation.append({"role": "user", "content": message})
150
 
151
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
@@ -200,7 +183,9 @@ def generate(
200
  se_highlighted_text += f" {se_new_highlighted_text}"
201
  acc_highlighted_text += f" {acc_new_highlighted_text}"
202
 
203
- yield se_highlighted_text, acc_highlighted_text
 
 
204
 
205
 
206
  def highlight_text(text: str, uncertainty_score: float) -> str:
@@ -237,27 +222,33 @@ with gr.Blocks(title="Llama-2 7B Chat with Dual Probes", css="footer {visibility
237
 
238
  with gr.Row():
239
  generate_btn = gr.Button("Generate")
240
- # add spacing between probes and titles for each output
 
 
241
  with gr.Row():
242
  with gr.Column():
 
243
  title = gr.HTML("<h2>Semantic Uncertainty Probe</h2>")
244
  se_output = gr.HTML(label="Semantic Uncertainty Probe")
 
 
 
 
245
  with gr.Column():
246
  title = gr.HTML("<h2>Accuracy Probe</h2>")
247
  acc_output = gr.HTML(label="Accuracy Probe")
248
 
249
- chat_history = gr.State([])
250
-
251
  gr.Examples(
252
  examples=EXAMPLES,
253
- inputs=[message, system_prompt, chat_history],
254
  outputs=[se_output, acc_output],
255
  fn=generate,
 
256
  )
257
 
258
  generate_btn.click(
259
  generate,
260
- inputs=[message, system_prompt, chat_history, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
261
  outputs=[se_output, acc_output]
262
  )
263
 
 
26
  """
27
 
28
  EXAMPLES = [
29
+ ["What is the capital of France?", "You are a helpful assistant."],
30
+ ["Who landed on the moon?", "You are a knowledgeable historian."],
31
+ ["Who is Yarin Gal?", "You are a helpful assistant."],
32
+ ["Explain the theory of relativity in simple terms.", "You are an expert physicist explaining concepts to a layman."],
33
  ]
34
 
35
  if torch.cuda.is_available():
 
91
  # gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
92
  # input_ids = input_ids.to(model.device)
93
 
94
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
  # se_highlighted_text = ""
97
  # acc_highlighted_text = ""
 
119
  @spaces.GPU
120
  def generate(
121
  message: str,
 
122
  system_prompt: str,
123
  max_new_tokens: int = DEFAULT_MAX_NEW_TOKENS,
124
  temperature: float = 0.6,
 
129
  conversation = []
130
  if system_prompt:
131
  conversation.append({"role": "system", "content": system_prompt})
 
 
132
  conversation.append({"role": "user", "content": message})
133
 
134
  input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
 
183
  se_highlighted_text += f" {se_new_highlighted_text}"
184
  acc_highlighted_text += f" {acc_new_highlighted_text}"
185
 
186
+ # yield se_highlighted_text, acc_highlighted_text
187
+ return se_highlighted_text, acc_highlighted_text
188
+
189
 
190
 
191
  def highlight_text(text: str, uncertainty_score: float) -> str:
 
222
 
223
  with gr.Row():
224
  generate_btn = gr.Button("Generate")
225
+ # Add spacing between probes
226
+ gr.HTML("<br><br>")
227
+
228
  with gr.Row():
229
  with gr.Column():
230
+ # make a box
231
  title = gr.HTML("<h2>Semantic Uncertainty Probe</h2>")
232
  se_output = gr.HTML(label="Semantic Uncertainty Probe")
233
+
234
+ # Add spacing between columns
235
+ gr.HTML("<div style='width: 20px;'></div>")
236
+
237
  with gr.Column():
238
  title = gr.HTML("<h2>Accuracy Probe</h2>")
239
  acc_output = gr.HTML(label="Accuracy Probe")
240
 
 
 
241
  gr.Examples(
242
  examples=EXAMPLES,
243
+ inputs=[message, system_prompt],
244
  outputs=[se_output, acc_output],
245
  fn=generate,
246
+
247
  )
248
 
249
  generate_btn.click(
250
  generate,
251
+ inputs=[message, system_prompt, max_new_tokens, temperature, top_p, top_k, repetition_penalty],
252
  outputs=[se_output, acc_output]
253
  )
254