Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Add end of step screenshot in chat
Browse files- app.py +8 -1
- e2bqwen.py +5 -2
app.py
CHANGED
@@ -522,11 +522,18 @@ class EnrichedGradioUI(GradioUI):
|
|
522 |
yield stored_messages
|
523 |
|
524 |
for msg in stream_to_gradio(session_state["agent"], task=full_task, reset_agent_memory=False):
|
|
|
|
|
|
|
|
|
|
|
525 |
stored_messages.append(msg)
|
526 |
yield stored_messages
|
527 |
|
528 |
yield stored_messages
|
529 |
-
|
|
|
|
|
530 |
# except Exception as e:
|
531 |
# error_message=f"Error in interaction: {str(e)}"
|
532 |
# stored_messages.append(gr.ChatMessage(role="assistant", content=error_message))
|
|
|
522 |
yield stored_messages
|
523 |
|
524 |
for msg in stream_to_gradio(session_state["agent"], task=full_task, reset_agent_memory=False):
|
525 |
+
if hasattr(session_state["agent"], "last_screenshot") and msg.content == "-----": # Append the last screenshot before the end of step
|
526 |
+
stored_messages.append(gr.ChatMessage(
|
527 |
+
role="assistant",
|
528 |
+
content={"path": session_state["agent"].last_screenshot.to_string(), "mime_type": "image/png"},
|
529 |
+
))
|
530 |
stored_messages.append(msg)
|
531 |
yield stored_messages
|
532 |
|
533 |
yield stored_messages
|
534 |
+
|
535 |
+
# TODO: uncomment below after testing
|
536 |
+
# save_final_status(data_dir, "completed", details = str(session_state["agent"].memory.get_succinct_steps()))
|
537 |
# except Exception as e:
|
538 |
# error_message=f"Error in interaction: {str(e)}"
|
539 |
# stored_messages.append(gr.ChatMessage(role="assistant", content=error_message))
|
e2bqwen.py
CHANGED
@@ -18,7 +18,9 @@ from smolagents import CodeAgent, tool, HfApiModel
|
|
18 |
from smolagents.memory import ActionStep
|
19 |
from smolagents.models import ChatMessage, MessageRole, Model
|
20 |
from smolagents.monitoring import LogLevel
|
21 |
-
|
|
|
|
|
22 |
E2B_SYSTEM_PROMPT_TEMPLATE = """You are a desktop automation assistant that can control a remote desktop environment.
|
23 |
On top of performing computations in the Python code snippets that you create, you only have access to these tools to interact with the desktop, no additional ones:
|
24 |
{%- for tool in tools.values() %}
|
@@ -321,6 +323,7 @@ class E2BVisionAgent(CodeAgent):
|
|
321 |
# Create a filename with step number
|
322 |
screenshot_path = os.path.join(self.data_dir, f"step_{current_step:03d}.png")
|
323 |
image.save(screenshot_path)
|
|
|
324 |
print(f"Saved screenshot for step {current_step} to {screenshot_path}")
|
325 |
|
326 |
for (
|
@@ -333,7 +336,7 @@ class E2BVisionAgent(CodeAgent):
|
|
333 |
previous_memory_step.observations_images = None
|
334 |
|
335 |
# Add to the current memory step
|
336 |
-
memory_step.observations_images = [image.copy()]
|
337 |
|
338 |
# memory_step.observations_images = [screenshot_path] # IF YOU USE THIS INSTEAD OF ABOVE, LAUNCHING A SECOND TASK BREAKS
|
339 |
|
|
|
18 |
from smolagents.memory import ActionStep
|
19 |
from smolagents.models import ChatMessage, MessageRole, Model
|
20 |
from smolagents.monitoring import LogLevel
|
21 |
+
from smolagents.agent_types import AgentImage
|
22 |
+
|
23 |
+
|
24 |
E2B_SYSTEM_PROMPT_TEMPLATE = """You are a desktop automation assistant that can control a remote desktop environment.
|
25 |
On top of performing computations in the Python code snippets that you create, you only have access to these tools to interact with the desktop, no additional ones:
|
26 |
{%- for tool in tools.values() %}
|
|
|
323 |
# Create a filename with step number
|
324 |
screenshot_path = os.path.join(self.data_dir, f"step_{current_step:03d}.png")
|
325 |
image.save(screenshot_path)
|
326 |
+
self.last_screenshot = AgentImage(screenshot_path)
|
327 |
print(f"Saved screenshot for step {current_step} to {screenshot_path}")
|
328 |
|
329 |
for (
|
|
|
336 |
previous_memory_step.observations_images = None
|
337 |
|
338 |
# Add to the current memory step
|
339 |
+
memory_step.observations_images = [image.copy()]
|
340 |
|
341 |
# memory_step.observations_images = [screenshot_path] # IF YOU USE THIS INSTEAD OF ABOVE, LAUNCHING A SECOND TASK BREAKS
|
342 |
|