m-ric HF Staff commited on
Commit
cf80979
·
1 Parent(s): 6fdb42a

Add end of step screenshot in chat

Browse files
Files changed (2) hide show
  1. app.py +8 -1
  2. e2bqwen.py +5 -2
app.py CHANGED
@@ -522,11 +522,18 @@ class EnrichedGradioUI(GradioUI):
522
  yield stored_messages
523
 
524
  for msg in stream_to_gradio(session_state["agent"], task=full_task, reset_agent_memory=False):
 
 
 
 
 
525
  stored_messages.append(msg)
526
  yield stored_messages
527
 
528
  yield stored_messages
529
- # save_final_status(data_dir, "completed", details = str(session_state["agent"].memory.get_succinct_steps()))
 
 
530
  # except Exception as e:
531
  # error_message=f"Error in interaction: {str(e)}"
532
  # stored_messages.append(gr.ChatMessage(role="assistant", content=error_message))
 
522
  yield stored_messages
523
 
524
  for msg in stream_to_gradio(session_state["agent"], task=full_task, reset_agent_memory=False):
525
+ if hasattr(session_state["agent"], "last_screenshot") and msg.content == "-----": # Append the last screenshot before the end of step
526
+ stored_messages.append(gr.ChatMessage(
527
+ role="assistant",
528
+ content={"path": session_state["agent"].last_screenshot.to_string(), "mime_type": "image/png"},
529
+ ))
530
  stored_messages.append(msg)
531
  yield stored_messages
532
 
533
  yield stored_messages
534
+
535
+ # TODO: uncomment below after testing
536
+ # save_final_status(data_dir, "completed", details = str(session_state["agent"].memory.get_succinct_steps()))
537
  # except Exception as e:
538
  # error_message=f"Error in interaction: {str(e)}"
539
  # stored_messages.append(gr.ChatMessage(role="assistant", content=error_message))
e2bqwen.py CHANGED
@@ -18,7 +18,9 @@ from smolagents import CodeAgent, tool, HfApiModel
18
  from smolagents.memory import ActionStep
19
  from smolagents.models import ChatMessage, MessageRole, Model
20
  from smolagents.monitoring import LogLevel
21
-
 
 
22
  E2B_SYSTEM_PROMPT_TEMPLATE = """You are a desktop automation assistant that can control a remote desktop environment.
23
  On top of performing computations in the Python code snippets that you create, you only have access to these tools to interact with the desktop, no additional ones:
24
  {%- for tool in tools.values() %}
@@ -321,6 +323,7 @@ class E2BVisionAgent(CodeAgent):
321
  # Create a filename with step number
322
  screenshot_path = os.path.join(self.data_dir, f"step_{current_step:03d}.png")
323
  image.save(screenshot_path)
 
324
  print(f"Saved screenshot for step {current_step} to {screenshot_path}")
325
 
326
  for (
@@ -333,7 +336,7 @@ class E2BVisionAgent(CodeAgent):
333
  previous_memory_step.observations_images = None
334
 
335
  # Add to the current memory step
336
- memory_step.observations_images = [image.copy()] # This takes the original image directly.
337
 
338
  # memory_step.observations_images = [screenshot_path] # IF YOU USE THIS INSTEAD OF ABOVE, LAUNCHING A SECOND TASK BREAKS
339
 
 
18
  from smolagents.memory import ActionStep
19
  from smolagents.models import ChatMessage, MessageRole, Model
20
  from smolagents.monitoring import LogLevel
21
+ from smolagents.agent_types import AgentImage
22
+
23
+
24
  E2B_SYSTEM_PROMPT_TEMPLATE = """You are a desktop automation assistant that can control a remote desktop environment.
25
  On top of performing computations in the Python code snippets that you create, you only have access to these tools to interact with the desktop, no additional ones:
26
  {%- for tool in tools.values() %}
 
323
  # Create a filename with step number
324
  screenshot_path = os.path.join(self.data_dir, f"step_{current_step:03d}.png")
325
  image.save(screenshot_path)
326
+ self.last_screenshot = AgentImage(screenshot_path)
327
  print(f"Saved screenshot for step {current_step} to {screenshot_path}")
328
 
329
  for (
 
336
  previous_memory_step.observations_images = None
337
 
338
  # Add to the current memory step
339
+ memory_step.observations_images = [image.copy()]
340
 
341
  # memory_step.observations_images = [screenshot_path] # IF YOU USE THIS INSTEAD OF ABOVE, LAUNCHING A SECOND TASK BREAKS
342