Firoj112 commited on
Commit
e0dd43c
Β·
verified Β·
1 Parent(s): 0a194bd

Update GRADIO_UI.py

Browse files
Files changed (1) hide show
  1. GRADIO_UI.py +87 -97
GRADIO_UI.py CHANGED
@@ -4,48 +4,41 @@
4
  #
5
  import os
6
  import re
 
7
  from typing import Optional
 
8
 
9
  from smolagents.agent_types import AgentAudio, AgentImage, AgentText, handle_agent_output_types
10
  from smolagents.agents import ActionStep, MultiStepAgent
11
  from smolagents.memory import MemoryStep
12
  from smolagents.utils import _is_package_available
13
 
 
 
 
14
 
15
- def pull_messages_from_step(
16
- step_log: MemoryStep,
17
- ):
18
  """Extract ChatMessage objects from agent steps with proper nesting"""
19
  import gradio as gr
20
 
21
  if isinstance(step_log, ActionStep):
22
- # Output the step number
23
  step_number = f"Step {step_log.step_number}" if step_log.step_number is not None else ""
24
  yield gr.ChatMessage(role="assistant", content=f"**{step_number}**")
25
 
26
- # First yield the thought/reasoning from the LLM
27
  if hasattr(step_log, "model_output") and step_log.model_output is not None:
28
- # Clean up the LLM output
29
  model_output = step_log.model_output.strip()
30
- # Remove any trailing <end_code> and extra backticks, handling multiple possible formats
31
  model_output = re.sub(r"```\s*<end_code>", "```", model_output)
32
  model_output = re.sub(r"<end_code>\s*```", "```", model_output)
33
  model_output = re.sub(r"```\s*\n\s*<end_code>", "```", model_output)
34
  model_output = model_output.strip()
35
  yield gr.ChatMessage(role="assistant", content=model_output)
36
 
37
- # For tool calls, create a parent message
38
  if hasattr(step_log, "tool_calls") and step_log.tool_calls is not None:
39
  first_tool_call = step_log.tool_calls[0]
40
  used_code = first_tool_call.name == "python_interpreter"
41
  parent_id = f"call_{len(step_log.tool_calls)}"
42
-
43
- # Tool call becomes the parent message with timing info
44
  args = first_tool_call.arguments
45
- if isinstance(args, dict):
46
- content = str(args.get("answer", str(args)))
47
- else:
48
- content = str(args).strip()
49
 
50
  if used_code:
51
  content = re.sub(r"```.*?\n", "", content)
@@ -57,48 +50,57 @@ def pull_messages_from_step(
57
  parent_message_tool = gr.ChatMessage(
58
  role="assistant",
59
  content=content,
60
- metadata={
61
- "title": f"πŸ› οΈ Used tool {first_tool_call.name}",
62
- "id": parent_id,
63
- "status": "pending",
64
- },
65
  )
66
  yield parent_message_tool
67
 
68
- # Nesting execution logs under the tool call if they exist
69
- if hasattr(step_log, "observations") and (
70
- step_log.observations is not None and step_log.observations.strip()
71
- ):
72
- log_content = step_log.observations.strip()
73
  if log_content:
74
- log_content = re.sub(r"^Execution logs:\s*", "", log_content)
75
- yield gr.ChatMessage(
76
- role="assistant",
77
- content=f"{log_content}",
78
- metadata={"title": "πŸ“ Execution Logs", "parent_id": parent_id, "status": "done"},
79
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
 
81
- # Nesting any errors under the tool call
82
  if hasattr(step_log, "error") and step_log.error is not None:
83
  yield gr.ChatMessage(
84
  role="assistant",
85
  content=str(step_log.error),
86
- metadata={"title": "πŸ’₯ Error", "parent_id": parent_id, "status": "done"},
87
  )
88
 
89
- # Update parent message metadata to done status without yielding a new message
90
  parent_message_tool.metadata["status"] = "done"
91
 
92
- # Handle standalone errors but not from tool calls
93
  elif hasattr(step_log, "error") and step_log.error is not None:
94
  yield gr.ChatMessage(role="assistant", content=str(step_log.error), metadata={"title": "πŸ’₯ Error"})
95
 
96
- # Calculate duration and token information
97
  step_footnote = f"{step_number}"
98
  if hasattr(step_log, "input_token_count") and hasattr(step_log, "output_token_count"):
99
- token_str = (
100
- f" | Input-tokens:{step_log.input_token_count:,} | Output-tokens:{step_log.output_token_count:,}"
101
- )
102
  step_footnote += token_str
103
  if hasattr(step_log, "duration"):
104
  step_duration = f" | Duration: {round(float(step_log.duration), 2)}" if step_log.duration else None
@@ -107,85 +109,79 @@ def pull_messages_from_step(
107
  yield gr.ChatMessage(role="assistant", content=f"{step_footnote}")
108
  yield gr.ChatMessage(role="assistant", content="-----")
109
 
110
- # Handle screenshot if present in observations
111
  if hasattr(step_log, "observations") and step_log.observations:
112
  for line in step_log.observations.split("\n"):
113
  if line.startswith("Screenshot saved at:"):
114
  screenshot_path = line.replace("Screenshot saved at: ", "").strip()
 
115
  yield gr.ChatMessage(
116
  role="assistant",
117
  content={"path": screenshot_path, "mime_type": "image/png"},
118
  metadata={"title": "πŸ“Έ Screenshot"}
119
  )
 
 
 
 
 
 
 
 
 
 
 
 
120
 
121
-
122
- def stream_to_gradio(
123
- agent,
124
- task: str,
125
- reset_agent_memory: bool = False,
126
- additional_args: Optional[dict] = None,
127
- ):
128
- """Runs an agent with the given task and streams the messages from the agent as gradio ChatMessages."""
129
  if not _is_package_available("gradio"):
130
- raise ModuleNotFoundError(
131
- "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
132
- )
133
  import gradio as gr
134
 
 
 
 
 
135
  total_input_tokens = 0
136
  total_output_tokens = 0
137
 
138
  for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args):
139
- if hasattr(agent.model, "last_input_token_count"):
140
- total_input_tokens += agent.model.last_input_token_count
141
- total_output_tokens += agent.model.last_output_token_count
142
- if isinstance(step_log, ActionStep):
143
- step_log.input_token_count = agent.model.last_input_token_count
144
- step_log.output_token_count = agent.model.last_output_token_count
145
-
146
- for message in pull_messages_from_step(
147
- step_log,
148
- ):
 
 
149
  yield message
150
 
151
  final_answer = step_log
152
  final_answer = handle_agent_output_types(final_answer)
153
 
154
  if isinstance(final_answer, AgentText):
155
- yield gr.ChatMessage(
156
- role="assistant",
157
- content=f"**Final answer:**\n{final_answer.to_string()}\n",
158
- )
159
  elif isinstance(final_answer, AgentImage):
160
- yield gr.ChatMessage(
161
- role="assistant",
162
- content={"path": final_answer.to_string(), "mime_type": "image/png"},
163
- )
164
  elif isinstance(final_answer, AgentAudio):
165
- yield gr.ChatMessage(
166
- role="assistant",
167
- content={"path": final_answer.to_string(), "mime_type": "audio/wav"},
168
- )
169
  else:
170
  yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
171
 
172
-
173
  class GradioUI:
174
- """A one-line interface to launch your agent in Gradio"""
175
-
176
  def __init__(self, agent: MultiStepAgent):
177
  if not _is_package_available("gradio"):
178
- raise ModuleNotFoundError(
179
- "Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`"
180
- )
181
  self.agent = agent
182
 
183
- def interact_with_agent(self, prompt, messages):
184
  import gradio as gr
185
-
186
  messages.append(gr.ChatMessage(role="user", content=prompt))
187
  yield messages
188
- for msg in stream_to_gradio(self.agent, task=prompt, reset_agent_memory=False):
189
  messages.append(msg)
190
  yield messages
191
  yield messages
@@ -193,7 +189,6 @@ class GradioUI:
193
  def launch(self, **kwargs):
194
  import gradio as gr
195
 
196
- # Custom CSS for avatar alignment
197
  css = """
198
  .chatbot .avatar-container {
199
  display: flex !important;
@@ -211,26 +206,21 @@ class GradioUI:
211
  }
212
  """
213
 
214
- with gr.Blocks(fill_height=True) as demo:
215
  stored_messages = gr.State([])
 
 
 
 
216
  chatbot = gr.Chatbot(
217
- label="Web Navigation Agent",
218
- type="messages",
219
- avatar_images=(
220
- None,
221
- "./icon.png"
222
- ),
223
- scale=1,
224
- height=600,
225
  )
226
- text_input = gr.Textbox(lines=1, label="Enter URL and request (e.g., Please could you navigate to https://www.federalreserve.gov/releases/h15/ and extract interest rates)")
227
- text_input.submit(
228
- self.interact_with_agent,
229
- [text_input, stored_messages],
230
- [chatbot]
231
  )
 
232
 
233
  demo.launch(debug=True, **kwargs)
234
 
235
-
236
  __all__ = ["stream_to_gradio", "GradioUI"]
 
4
  #
5
  import os
6
  import re
7
+ import json
8
  from typing import Optional
9
+ import logging
10
 
11
  from smolagents.agent_types import AgentAudio, AgentImage, AgentText, handle_agent_output_types
12
  from smolagents.agents import ActionStep, MultiStepAgent
13
  from smolagents.memory import MemoryStep
14
  from smolagents.utils import _is_package_available
15
 
16
+ # Set up logging
17
+ logging.basicConfig(level=logging.DEBUG)
18
+ logger = logging.getLogger(__name__)
19
 
20
+ def pull_messages_from_step(step_log: MemoryStep):
 
 
21
  """Extract ChatMessage objects from agent steps with proper nesting"""
22
  import gradio as gr
23
 
24
  if isinstance(step_log, ActionStep):
 
25
  step_number = f"Step {step_log.step_number}" if step_log.step_number is not None else ""
26
  yield gr.ChatMessage(role="assistant", content=f"**{step_number}**")
27
 
 
28
  if hasattr(step_log, "model_output") and step_log.model_output is not None:
 
29
  model_output = step_log.model_output.strip()
 
30
  model_output = re.sub(r"```\s*<end_code>", "```", model_output)
31
  model_output = re.sub(r"<end_code>\s*```", "```", model_output)
32
  model_output = re.sub(r"```\s*\n\s*<end_code>", "```", model_output)
33
  model_output = model_output.strip()
34
  yield gr.ChatMessage(role="assistant", content=model_output)
35
 
 
36
  if hasattr(step_log, "tool_calls") and step_log.tool_calls is not None:
37
  first_tool_call = step_log.tool_calls[0]
38
  used_code = first_tool_call.name == "python_interpreter"
39
  parent_id = f"call_{len(step_log.tool_calls)}"
 
 
40
  args = first_tool_call.arguments
41
+ content = str(args.get("answer", str(args))) if isinstance(args, dict) else str(args).strip()
 
 
 
42
 
43
  if used_code:
44
  content = re.sub(r"```.*?\n", "", content)
 
50
  parent_message_tool = gr.ChatMessage(
51
  role="assistant",
52
  content=content,
53
+ metadata={"title": f"πŸ› οΈ Used tool {first_tool_call.name}", "id": parent_id, "status": "pending"}
 
 
 
 
54
  )
55
  yield parent_message_tool
56
 
57
+ if hasattr(step_log, "observations") and step_log.observations and step_log.observations.strip():
58
+ log_content = re.sub(r"^Execution logs:\s*", "", step_log.observations.strip())
 
 
 
59
  if log_content:
60
+ try:
61
+ # Try to parse as JSON for table data
62
+ data = json.loads(log_content)
63
+ if isinstance(data, list) and data and isinstance(data[0], list):
64
+ # Format as markdown table
65
+ headers = data[0]
66
+ rows = data[1:]
67
+ table_md = "| " + " | ".join(headers) + " |\n"
68
+ table_md += "| " + " | ".join(["---"] * len(headers)) + " |\n"
69
+ for row in rows:
70
+ table_md += "| " + " | ".join(str(cell) for cell in row) + " |\n"
71
+ yield gr.ChatMessage(
72
+ role="assistant",
73
+ content=table_md,
74
+ metadata={"title": "πŸ“Š Table Data", "parent_id": parent_id, "status": "done"}
75
+ )
76
+ else:
77
+ yield gr.ChatMessage(
78
+ role="assistant",
79
+ content=log_content,
80
+ metadata={"title": "πŸ“ Execution Logs", "parent_id": parent_id, "status": "done"}
81
+ )
82
+ except json.JSONDecodeError:
83
+ yield gr.ChatMessage(
84
+ role="assistant",
85
+ content=log_content,
86
+ metadata={"title": "πŸ“ Execution Logs", "parent_id": parent_id, "status": "done"}
87
+ )
88
 
 
89
  if hasattr(step_log, "error") and step_log.error is not None:
90
  yield gr.ChatMessage(
91
  role="assistant",
92
  content=str(step_log.error),
93
+ metadata={"title": "πŸ’₯ Error", "parent_id": parent_id, "status": "done"}
94
  )
95
 
 
96
  parent_message_tool.metadata["status"] = "done"
97
 
 
98
  elif hasattr(step_log, "error") and step_log.error is not None:
99
  yield gr.ChatMessage(role="assistant", content=str(step_log.error), metadata={"title": "πŸ’₯ Error"})
100
 
 
101
  step_footnote = f"{step_number}"
102
  if hasattr(step_log, "input_token_count") and hasattr(step_log, "output_token_count"):
103
+ token_str = f" | Input-tokens:{step_log.input_token_count:,} | Output-tokens:{step_log.output_token_count:,}"
 
 
104
  step_footnote += token_str
105
  if hasattr(step_log, "duration"):
106
  step_duration = f" | Duration: {round(float(step_log.duration), 2)}" if step_log.duration else None
 
109
  yield gr.ChatMessage(role="assistant", content=f"{step_footnote}")
110
  yield gr.ChatMessage(role="assistant", content="-----")
111
 
 
112
  if hasattr(step_log, "observations") and step_log.observations:
113
  for line in step_log.observations.split("\n"):
114
  if line.startswith("Screenshot saved at:"):
115
  screenshot_path = line.replace("Screenshot saved at: ", "").strip()
116
+ logger.debug(f"Yielding screenshot: {screenshot_path}")
117
  yield gr.ChatMessage(
118
  role="assistant",
119
  content={"path": screenshot_path, "mime_type": "image/png"},
120
  metadata={"title": "πŸ“Έ Screenshot"}
121
  )
122
+ elif line.endswith("_detected.png"):
123
+ yield gr.ChatMessage(
124
+ role="assistant",
125
+ content={"path": line.strip(), "mime_type": "image/png"},
126
+ metadata={"title": "πŸ–ΌοΈ Detected Elements"}
127
+ )
128
+ elif line and not line.startswith("Current url:"):
129
+ yield gr.ChatMessage(
130
+ role="assistant",
131
+ content=line,
132
+ metadata={"title": "πŸ“ Scraped Text"}
133
+ )
134
 
135
+ def stream_to_gradio(agent, task: str, api_key: str = None, reset_agent_memory: bool = False, additional_args: Optional[dict] = None):
 
 
 
 
 
 
 
136
  if not _is_package_available("gradio"):
137
+ raise ModuleNotFoundError("Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`")
 
 
138
  import gradio as gr
139
 
140
+ if api_key:
141
+ os.environ["GOOGLE_API_KEY"] = api_key
142
+ logger.debug("Using user-provided GOOGLE_API_KEY")
143
+
144
  total_input_tokens = 0
145
  total_output_tokens = 0
146
 
147
  for step_log in agent.run(task, stream=True, reset=reset_agent_memory, additional_args=additional_args):
148
+ input_tokens = agent.model.last_input_token_count
149
+ output_tokens = agent.model.last_output_token_count
150
+ logger.debug(f"Input tokens: {input_tokens}, Output tokens: {output_tokens}")
151
+ if input_tokens is not None:
152
+ total_input_tokens += input_tokens
153
+ if output_tokens is not None:
154
+ total_output_tokens += output_tokens
155
+ if isinstance(step_log, ActionStep):
156
+ step_log.input_token_count = input_tokens if input_tokens is not None else 0
157
+ step_log.output_token_count = output_tokens if output_tokens is not None else 0
158
+
159
+ for message in pull_messages_from_step(step_log):
160
  yield message
161
 
162
  final_answer = step_log
163
  final_answer = handle_agent_output_types(final_answer)
164
 
165
  if isinstance(final_answer, AgentText):
166
+ yield gr.ChatMessage(role="assistant", content=f"**Final answer:**\n{final_answer.to_string()}\n")
 
 
 
167
  elif isinstance(final_answer, AgentImage):
168
+ yield gr.ChatMessage(role="assistant", content={"path": final_answer.to_string(), "mime_type": "image/png"})
 
 
 
169
  elif isinstance(final_answer, AgentAudio):
170
+ yield gr.ChatMessage(role="assistant", content={"path": final_answer.to_string(), "mime_type": "audio/wav"})
 
 
 
171
  else:
172
  yield gr.ChatMessage(role="assistant", content=f"**Final answer:** {str(final_answer)}")
173
 
 
174
  class GradioUI:
 
 
175
  def __init__(self, agent: MultiStepAgent):
176
  if not _is_package_available("gradio"):
177
+ raise ModuleNotFoundError("Please install 'gradio' extra to use the GradioUI: `pip install 'smolagents[gradio]'`")
 
 
178
  self.agent = agent
179
 
180
+ def interact_with_agent(self, prompt, api_key, messages):
181
  import gradio as gr
 
182
  messages.append(gr.ChatMessage(role="user", content=prompt))
183
  yield messages
184
+ for msg in stream_to_gradio(self.agent, task=prompt, api_key=api_key, reset_agent_memory=False):
185
  messages.append(msg)
186
  yield messages
187
  yield messages
 
189
  def launch(self, **kwargs):
190
  import gradio as gr
191
 
 
192
  css = """
193
  .chatbot .avatar-container {
194
  display: flex !important;
 
206
  }
207
  """
208
 
209
+ with gr.Blocks(fill_height=True, css=css) as demo:
210
  stored_messages = gr.State([])
211
+ gr.Markdown("**Note**: Please provide your own Gemini API key below. The default key may run out of quota.")
212
+ api_key_input = gr.Textbox(
213
+ lines=1, label="Gemini API Key (optional)", placeholder="Enter your Gemini API key here", type="password"
214
+ )
215
  chatbot = gr.Chatbot(
216
+ label="Web Navigation Agent", type="messages",
217
+ avatar_images=(None, "./icon.png"), scale=1, height=600
 
 
 
 
 
 
218
  )
219
+ text_input = gr.Textbox(
220
+ lines=1, label="Enter URL and request (e.g., navigate to https://en.wikipedia.org/wiki/Nvidia, scrape the infobox table, fill a search form)"
 
 
 
221
  )
222
+ text_input.submit(self.interact_with_agent, [text_input, api_key_input, stored_messages], [chatbot])
223
 
224
  demo.launch(debug=True, **kwargs)
225
 
 
226
  __all__ = ["stream_to_gradio", "GradioUI"]