Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Switch to agent logging
Browse files- app.py +78 -52
- e2bqwen.py +14 -38
app.py
CHANGED
@@ -35,25 +35,17 @@ model = QwenVLAPIModel(
|
|
35 |
|
36 |
|
37 |
custom_css = """
|
38 |
-
:root {
|
39 |
-
--body-background-fill: black!important;
|
40 |
-
--background-fill-secondary: #fad391!important;
|
41 |
-
--body-text-color: #f59e0b!important;
|
42 |
-
--block-text-color: #f59e0b!important;
|
43 |
-
}
|
44 |
.sandbox-container {
|
45 |
position: relative;
|
46 |
width: 910px;
|
47 |
overflow: hidden;
|
48 |
margin: auto;
|
49 |
}
|
50 |
-
.
|
51 |
height: 800px;
|
52 |
}
|
53 |
-
.minimal {
|
54 |
-
height: 700px;
|
55 |
-
}
|
56 |
.sandbox-frame {
|
|
|
57 |
position: absolute;
|
58 |
top: 0;
|
59 |
left: 0;
|
@@ -61,9 +53,6 @@ custom_css = """
|
|
61 |
height: 800px;
|
62 |
pointer-events:none;
|
63 |
}
|
64 |
-
.minimal .sandbox-frame {
|
65 |
-
display: none;
|
66 |
-
}
|
67 |
|
68 |
.sandbox-iframe, .bsod-image {
|
69 |
position: absolute;
|
@@ -72,19 +61,6 @@ custom_css = """
|
|
72 |
border: 4px solid #444444;
|
73 |
transform-origin: 0 0;
|
74 |
}
|
75 |
-
.cyberpunk .sandbox-iframe, .bsod-image {
|
76 |
-
/* top: 73px; */
|
77 |
-
top: 99px;
|
78 |
-
/* left: 74px; */
|
79 |
-
left: 110px;
|
80 |
-
}
|
81 |
-
.cyberpunk .sandbox-iframe {
|
82 |
-
transform: scale(0.535);
|
83 |
-
/* transform: scale(0.59); */
|
84 |
-
}
|
85 |
-
.minimal .sandbox-iframe {
|
86 |
-
transform: scale(0.65);
|
87 |
-
}
|
88 |
|
89 |
/* Colored label for task textbox */
|
90 |
.primary-color-label label span {
|
@@ -100,11 +76,7 @@ custom_css = """
|
|
100 |
flex-align:center;
|
101 |
z-index: 100;
|
102 |
}
|
103 |
-
|
104 |
-
position: absolute;
|
105 |
-
bottom: 86px;
|
106 |
-
left: 355px;
|
107 |
-
}
|
108 |
.status-indicator {
|
109 |
width: 15px;
|
110 |
height: 15px;
|
@@ -117,9 +89,7 @@ custom_css = """
|
|
117 |
padding: 0 10px;
|
118 |
text-shadow: none;
|
119 |
}
|
120 |
-
|
121 |
-
color: #fed244;
|
122 |
-
}
|
123 |
.status-interactive {
|
124 |
background-color: #2ecc71;
|
125 |
animation: blink 2s infinite;
|
@@ -186,7 +156,7 @@ sandbox_html_template = """
|
|
186 |
<style>
|
187 |
@import url('https://fonts.googleapis.com/css2?family=Oxanium:[email protected]&display=swap');
|
188 |
</style>
|
189 |
-
<div class="sandbox-container
|
190 |
<div class="status-bar">
|
191 |
<div class="status-indicator {status_class}"></div>
|
192 |
<div class="status-text">{status_text}</div>
|
@@ -202,8 +172,9 @@ sandbox_html_template = """
|
|
202 |
</div>
|
203 |
""".replace("<<WIDTH>>", str(WIDTH+15)).replace("<<HEIGHT>>", str(HEIGHT+10))
|
204 |
|
205 |
-
custom_js = """
|
206 |
-
|
|
|
207 |
// Function to check if sandbox is timing out
|
208 |
const checkSandboxTimeout = function() {
|
209 |
const timeElement = document.getElementById('sandbox-creation-time');
|
@@ -427,7 +398,7 @@ def get_or_create_sandbox(session_hash):
|
|
427 |
|
428 |
return desktop
|
429 |
|
430 |
-
def update_html(interactive_mode: bool,
|
431 |
session_hash = request.session_hash
|
432 |
desktop = get_or_create_sandbox(session_hash)
|
433 |
auth_key = desktop.stream.get_auth_key()
|
@@ -443,7 +414,6 @@ def update_html(interactive_mode: bool, theme_checkbox: bool, request: gr.Reques
|
|
443 |
creation_time = SANDBOX_METADATA[session_hash]['created_at'] if session_hash in SANDBOX_METADATA else time.time()
|
444 |
|
445 |
sandbox_html_content = sandbox_html_template.format(
|
446 |
-
theme="cyberpunk" if theme_checkbox else "minimal",
|
447 |
stream_url=stream_url,
|
448 |
status_class=status_class,
|
449 |
status_text=status_text,
|
@@ -483,7 +453,7 @@ def initialize_session(interactive_mode, request: gr.Request):
|
|
483 |
with open(log_path, 'w') as f:
|
484 |
f.write(f"Ready to go...\n")
|
485 |
# Return HTML and session hash
|
486 |
-
return update_html(interactive_mode,
|
487 |
|
488 |
|
489 |
# Function to read log content that gets the path from session hash
|
@@ -564,7 +534,7 @@ class EnrichedGradioUI(GradioUI):
|
|
564 |
finally:
|
565 |
upload_to_hf_and_remove(data_dir)
|
566 |
|
567 |
-
theme = gr.themes.Default(font=[
|
568 |
|
569 |
# Create a Gradio app with Blocks
|
570 |
with gr.Blocks(theme=theme, css=custom_css, js=custom_js, fill_width=True) as demo:
|
@@ -576,7 +546,6 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js, fill_width=True) as de
|
|
576 |
with gr.Row():
|
577 |
sandbox_html = gr.HTML(
|
578 |
value=sandbox_html_template.format(
|
579 |
-
theme="cyberpunk",
|
580 |
stream_url="",
|
581 |
status_class="status-interactive",
|
582 |
status_text="Interactive"
|
@@ -613,7 +582,64 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js, fill_width=True) as de
|
|
613 |
)
|
614 |
|
615 |
update_btn = gr.Button("Let's go!", variant="primary")
|
616 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
617 |
|
618 |
chatbot_display = gr.Chatbot(
|
619 |
label="Agent's execution logs",
|
@@ -650,35 +676,35 @@ with gr.Blocks(theme=theme, css=custom_css, js=custom_js, fill_width=True) as de
|
|
650 |
# Function to set view-only mode
|
651 |
def clear_and_set_view_only(task_input, request: gr.Request):
|
652 |
# First clear the results, then set view-only mode
|
653 |
-
return "", update_html(False,
|
654 |
|
655 |
def set_interactive(request: gr.Request):
|
656 |
-
return update_html(True,
|
657 |
|
658 |
is_interactive = gr.Checkbox(value=True, visible=False)
|
659 |
|
660 |
# Chain the events
|
661 |
view_only_event = update_btn.click(
|
662 |
fn=clear_and_set_view_only,
|
663 |
-
inputs=[task_input
|
664 |
outputs=[results_output, sandbox_html, results_container]
|
665 |
)
|
666 |
view_only_event.then(agent_ui.interact_with_agent, [task_input, stored_messages, session_state, session_hash_state], [chatbot_display]).then(
|
667 |
fn=set_interactive,
|
668 |
-
inputs=[
|
669 |
outputs=sandbox_html
|
670 |
)
|
671 |
|
672 |
-
theme_checkbox.change(
|
673 |
-
fn=update_html,
|
674 |
-
inputs=[is_interactive, theme_checkbox],
|
675 |
-
outputs=[sandbox_html]
|
676 |
-
)
|
677 |
|
678 |
demo.load(
|
679 |
fn=initialize_session,
|
680 |
inputs=[is_interactive],
|
681 |
outputs=[sandbox_html, session_hash_state],
|
|
|
|
|
|
|
|
|
|
|
682 |
)
|
683 |
|
684 |
# Launch the app
|
|
|
35 |
|
36 |
|
37 |
custom_css = """
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
.sandbox-container {
|
39 |
position: relative;
|
40 |
width: 910px;
|
41 |
overflow: hidden;
|
42 |
margin: auto;
|
43 |
}
|
44 |
+
.sandbox-container {
|
45 |
height: 800px;
|
46 |
}
|
|
|
|
|
|
|
47 |
.sandbox-frame {
|
48 |
+
display: none;
|
49 |
position: absolute;
|
50 |
top: 0;
|
51 |
left: 0;
|
|
|
53 |
height: 800px;
|
54 |
pointer-events:none;
|
55 |
}
|
|
|
|
|
|
|
56 |
|
57 |
.sandbox-iframe, .bsod-image {
|
58 |
position: absolute;
|
|
|
61 |
border: 4px solid #444444;
|
62 |
transform-origin: 0 0;
|
63 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
|
65 |
/* Colored label for task textbox */
|
66 |
.primary-color-label label span {
|
|
|
76 |
flex-align:center;
|
77 |
z-index: 100;
|
78 |
}
|
79 |
+
|
|
|
|
|
|
|
|
|
80 |
.status-indicator {
|
81 |
width: 15px;
|
82 |
height: 15px;
|
|
|
89 |
padding: 0 10px;
|
90 |
text-shadow: none;
|
91 |
}
|
92 |
+
|
|
|
|
|
93 |
.status-interactive {
|
94 |
background-color: #2ecc71;
|
95 |
animation: blink 2s infinite;
|
|
|
156 |
<style>
|
157 |
@import url('https://fonts.googleapis.com/css2?family=Oxanium:[email protected]&display=swap');
|
158 |
</style>
|
159 |
+
<div class="sandbox-container">
|
160 |
<div class="status-bar">
|
161 |
<div class="status-indicator {status_class}"></div>
|
162 |
<div class="status-text">{status_text}</div>
|
|
|
172 |
</div>
|
173 |
""".replace("<<WIDTH>>", str(WIDTH+15)).replace("<<HEIGHT>>", str(HEIGHT+10))
|
174 |
|
175 |
+
custom_js = """function() {
|
176 |
+
document.body.classList.toggle('dark');
|
177 |
+
|
178 |
// Function to check if sandbox is timing out
|
179 |
const checkSandboxTimeout = function() {
|
180 |
const timeElement = document.getElementById('sandbox-creation-time');
|
|
|
398 |
|
399 |
return desktop
|
400 |
|
401 |
+
def update_html(interactive_mode: bool, request: gr.Request):
|
402 |
session_hash = request.session_hash
|
403 |
desktop = get_or_create_sandbox(session_hash)
|
404 |
auth_key = desktop.stream.get_auth_key()
|
|
|
414 |
creation_time = SANDBOX_METADATA[session_hash]['created_at'] if session_hash in SANDBOX_METADATA else time.time()
|
415 |
|
416 |
sandbox_html_content = sandbox_html_template.format(
|
|
|
417 |
stream_url=stream_url,
|
418 |
status_class=status_class,
|
419 |
status_text=status_text,
|
|
|
453 |
with open(log_path, 'w') as f:
|
454 |
f.write(f"Ready to go...\n")
|
455 |
# Return HTML and session hash
|
456 |
+
return update_html(interactive_mode, request), session_hash
|
457 |
|
458 |
|
459 |
# Function to read log content that gets the path from session hash
|
|
|
534 |
finally:
|
535 |
upload_to_hf_and_remove(data_dir)
|
536 |
|
537 |
+
theme = gr.themes.Default(font=["sans-serif"], primary_hue="amber", secondary_hue="blue")
|
538 |
|
539 |
# Create a Gradio app with Blocks
|
540 |
with gr.Blocks(theme=theme, css=custom_css, js=custom_js, fill_width=True) as demo:
|
|
|
546 |
with gr.Row():
|
547 |
sandbox_html = gr.HTML(
|
548 |
value=sandbox_html_template.format(
|
|
|
549 |
stream_url="",
|
550 |
status_class="status-interactive",
|
551 |
status_text="Interactive"
|
|
|
582 |
)
|
583 |
|
584 |
update_btn = gr.Button("Let's go!", variant="primary")
|
585 |
+
|
586 |
+
cyberpunk_toggle = gr.Checkbox(label="Go Cyberpunk!", value=False)
|
587 |
+
|
588 |
+
def apply_theme(cyberpunk_mode: bool):
|
589 |
+
if cyberpunk_mode:
|
590 |
+
return """
|
591 |
+
<style>
|
592 |
+
:root {
|
593 |
+
--body-background-fill: black!important;
|
594 |
+
--background-fill-secondary: #fad391!important;
|
595 |
+
--body-text-color: #f59e0b!important;
|
596 |
+
--block-text-color: #f59e0b!important;
|
597 |
+
--font: Oxanium;
|
598 |
+
}
|
599 |
+
.sandbox-frame {
|
600 |
+
display: block!important;
|
601 |
+
}
|
602 |
+
|
603 |
+
.sandbox-iframe, .bsod-image {
|
604 |
+
/* top: 73px; */
|
605 |
+
top: 99px;
|
606 |
+
/* left: 74px; */
|
607 |
+
left: 110px;
|
608 |
+
}
|
609 |
+
.sandbox-iframe {
|
610 |
+
transform: scale(0.535);
|
611 |
+
/* transform: scale(0.59); */
|
612 |
+
}
|
613 |
+
|
614 |
+
.status-bar {
|
615 |
+
position: absolute;
|
616 |
+
bottom: 86px;
|
617 |
+
left: 355px;
|
618 |
+
}
|
619 |
+
.status-text {
|
620 |
+
color: #fed244;
|
621 |
+
}
|
622 |
+
</style>
|
623 |
+
"""
|
624 |
+
else:
|
625 |
+
return """
|
626 |
+
<style>
|
627 |
+
.sandbox-container {
|
628 |
+
height: 700px!important;
|
629 |
+
}
|
630 |
+
.sandbox-iframe {
|
631 |
+
transform: scale(0.65);
|
632 |
+
}
|
633 |
+
</style>
|
634 |
+
"""
|
635 |
+
|
636 |
+
# Hidden HTML element to inject CSS dynamically
|
637 |
+
theme_styles = gr.HTML(apply_theme(False), visible=False)
|
638 |
+
cyberpunk_toggle.change(
|
639 |
+
fn=apply_theme,
|
640 |
+
inputs=[cyberpunk_toggle],
|
641 |
+
outputs=[theme_styles]
|
642 |
+
)
|
643 |
|
644 |
chatbot_display = gr.Chatbot(
|
645 |
label="Agent's execution logs",
|
|
|
676 |
# Function to set view-only mode
|
677 |
def clear_and_set_view_only(task_input, request: gr.Request):
|
678 |
# First clear the results, then set view-only mode
|
679 |
+
return "", update_html(False, request), gr.update(visible=False)
|
680 |
|
681 |
def set_interactive(request: gr.Request):
|
682 |
+
return update_html(True, request)
|
683 |
|
684 |
is_interactive = gr.Checkbox(value=True, visible=False)
|
685 |
|
686 |
# Chain the events
|
687 |
view_only_event = update_btn.click(
|
688 |
fn=clear_and_set_view_only,
|
689 |
+
inputs=[task_input],
|
690 |
outputs=[results_output, sandbox_html, results_container]
|
691 |
)
|
692 |
view_only_event.then(agent_ui.interact_with_agent, [task_input, stored_messages, session_state, session_hash_state], [chatbot_display]).then(
|
693 |
fn=set_interactive,
|
694 |
+
inputs=[],
|
695 |
outputs=sandbox_html
|
696 |
)
|
697 |
|
|
|
|
|
|
|
|
|
|
|
698 |
|
699 |
demo.load(
|
700 |
fn=initialize_session,
|
701 |
inputs=[is_interactive],
|
702 |
outputs=[sandbox_html, session_hash_state],
|
703 |
+
js="""
|
704 |
+
() => {
|
705 |
+
document.body.classList.toggle('dark');
|
706 |
+
}
|
707 |
+
""",
|
708 |
)
|
709 |
|
710 |
# Launch the app
|
e2bqwen.py
CHANGED
@@ -18,26 +18,6 @@ from smolagents import CodeAgent, tool, HfApiModel
|
|
18 |
from smolagents.memory import ActionStep
|
19 |
from smolagents.models import ChatMessage, MessageRole, Model
|
20 |
from smolagents.monitoring import LogLevel
|
21 |
-
|
22 |
-
def write_to_console_log(log_file_path, message):
|
23 |
-
"""
|
24 |
-
Appends a message to the specified log file with a newline character.
|
25 |
-
|
26 |
-
Parameters:
|
27 |
-
log_file_path (str): Path to the log file
|
28 |
-
message (str): Message to append to the log file
|
29 |
-
"""
|
30 |
-
if log_file_path is None:
|
31 |
-
return False
|
32 |
-
try:
|
33 |
-
# Open the file in append mode
|
34 |
-
with open(log_file_path, 'a') as log_file:
|
35 |
-
# Write the message followed by a newline
|
36 |
-
log_file.write(f"{message}\n")
|
37 |
-
return True
|
38 |
-
except Exception as e:
|
39 |
-
print(f"Error writing to log file: {str(e)}")
|
40 |
-
return False
|
41 |
|
42 |
E2B_SYSTEM_PROMPT_TEMPLATE = """You are a desktop automation assistant that can control a remote desktop environment.
|
43 |
On top of performing computations in the Python code snippets that you create, you only have access to these tools to interact with the desktop, no additional ones:
|
@@ -125,13 +105,10 @@ class E2BVisionAgent(CodeAgent):
|
|
125 |
self.desktop = desktop
|
126 |
self.data_dir = data_dir
|
127 |
self.log_path = log_file
|
128 |
-
write_to_console_log(self.log_path, "Booting agent...")
|
129 |
self.planning_interval = planning_interval
|
130 |
# Initialize Desktop
|
131 |
self.width, self.height = self.desktop.get_screen_size()
|
132 |
print(f"Screen size: {self.width}x{self.height}")
|
133 |
-
write_to_console_log(self.log_path, f"Desktop resolution detected: {self.width}x{self.height}")
|
134 |
-
|
135 |
|
136 |
# Set up temp directory
|
137 |
os.makedirs(self.data_dir, exist_ok=True)
|
@@ -157,9 +134,9 @@ class E2BVisionAgent(CodeAgent):
|
|
157 |
|
158 |
# Add default tools
|
159 |
self._setup_desktop_tools()
|
160 |
-
|
161 |
self.step_callbacks.append(self.take_snapshot_callback)
|
162 |
-
|
163 |
|
164 |
def _setup_desktop_tools(self):
|
165 |
"""Register all desktop tools"""
|
@@ -173,7 +150,7 @@ class E2BVisionAgent(CodeAgent):
|
|
173 |
"""
|
174 |
self.desktop.move_mouse(x, y)
|
175 |
self.desktop.left_click()
|
176 |
-
|
177 |
return f"Clicked at coordinates ({x}, {y})"
|
178 |
|
179 |
@tool
|
@@ -186,7 +163,7 @@ class E2BVisionAgent(CodeAgent):
|
|
186 |
"""
|
187 |
self.desktop.move_mouse(x, y)
|
188 |
self.desktop.right_click()
|
189 |
-
|
190 |
return f"Right-clicked at coordinates ({x}, {y})"
|
191 |
|
192 |
@tool
|
@@ -199,7 +176,7 @@ class E2BVisionAgent(CodeAgent):
|
|
199 |
"""
|
200 |
self.desktop.move_mouse(x, y)
|
201 |
self.desktop.double_click()
|
202 |
-
|
203 |
return f"Double-clicked at coordinates ({x}, {y})"
|
204 |
|
205 |
@tool
|
@@ -211,7 +188,7 @@ class E2BVisionAgent(CodeAgent):
|
|
211 |
y: The y coordinate (vertical position)
|
212 |
"""
|
213 |
self.desktop.move_mouse(x, y)
|
214 |
-
|
215 |
return f"Moved mouse to coordinates ({x}, {y})"
|
216 |
|
217 |
@tool
|
@@ -223,7 +200,7 @@ class E2BVisionAgent(CodeAgent):
|
|
223 |
delay_in_ms: Delay between keystrokes in milliseconds
|
224 |
"""
|
225 |
self.desktop.write(text, delay_in_ms=delay_in_ms)
|
226 |
-
|
227 |
return f"Typed text: '{text}'"
|
228 |
|
229 |
@tool
|
@@ -236,7 +213,7 @@ class E2BVisionAgent(CodeAgent):
|
|
236 |
if key == "enter":
|
237 |
key = "Return"
|
238 |
self.desktop.press(key)
|
239 |
-
|
240 |
return f"Pressed key: {key}"
|
241 |
|
242 |
@tool
|
@@ -246,7 +223,7 @@ class E2BVisionAgent(CodeAgent):
|
|
246 |
Args:
|
247 |
"""
|
248 |
self.desktop.press(["alt", "left"])
|
249 |
-
|
250 |
return "Went back one page"
|
251 |
|
252 |
@tool
|
@@ -261,7 +238,7 @@ class E2BVisionAgent(CodeAgent):
|
|
261 |
"""
|
262 |
self.desktop.drag([x1, y1], [x2, y2])
|
263 |
message = f"Dragged and dropped from [{x1}, {y1}] to [{x2}, {y2}]"
|
264 |
-
|
265 |
return message
|
266 |
|
267 |
@tool
|
@@ -273,7 +250,7 @@ class E2BVisionAgent(CodeAgent):
|
|
273 |
amount: The amount to scroll. A good amount is 1 or 2.
|
274 |
"""
|
275 |
self.desktop.scroll(direction=direction, amount=amount)
|
276 |
-
|
277 |
return f"Scrolled {direction} by {amount}"
|
278 |
|
279 |
@tool
|
@@ -284,7 +261,7 @@ class E2BVisionAgent(CodeAgent):
|
|
284 |
seconds: Number of seconds to wait
|
285 |
"""
|
286 |
time.sleep(seconds)
|
287 |
-
|
288 |
return f"Waited for {seconds} seconds"
|
289 |
|
290 |
@tool
|
@@ -301,7 +278,7 @@ class E2BVisionAgent(CodeAgent):
|
|
301 |
self.desktop.open(url)
|
302 |
# Give it time to load
|
303 |
time.sleep(2)
|
304 |
-
|
305 |
return f"Opened URL: {url}"
|
306 |
|
307 |
|
@@ -330,8 +307,7 @@ class E2BVisionAgent(CodeAgent):
|
|
330 |
|
331 |
def take_snapshot_callback(self, memory_step: ActionStep, agent=None) -> None:
|
332 |
"""Callback that takes a screenshot + memory snapshot after a step completes"""
|
333 |
-
|
334 |
-
write_to_console_log(self.log_path, "Analyzing screen content...")
|
335 |
|
336 |
current_step = memory_step.step_number
|
337 |
print(f"Taking screenshot for step {current_step}")
|
|
|
18 |
from smolagents.memory import ActionStep
|
19 |
from smolagents.models import ChatMessage, MessageRole, Model
|
20 |
from smolagents.monitoring import LogLevel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
|
22 |
E2B_SYSTEM_PROMPT_TEMPLATE = """You are a desktop automation assistant that can control a remote desktop environment.
|
23 |
On top of performing computations in the Python code snippets that you create, you only have access to these tools to interact with the desktop, no additional ones:
|
|
|
105 |
self.desktop = desktop
|
106 |
self.data_dir = data_dir
|
107 |
self.log_path = log_file
|
|
|
108 |
self.planning_interval = planning_interval
|
109 |
# Initialize Desktop
|
110 |
self.width, self.height = self.desktop.get_screen_size()
|
111 |
print(f"Screen size: {self.width}x{self.height}")
|
|
|
|
|
112 |
|
113 |
# Set up temp directory
|
114 |
os.makedirs(self.data_dir, exist_ok=True)
|
|
|
134 |
|
135 |
# Add default tools
|
136 |
self._setup_desktop_tools()
|
137 |
+
self.logger.log("Setting up agent tools...")
|
138 |
self.step_callbacks.append(self.take_snapshot_callback)
|
139 |
+
self.logger.log("Studying an action plan... that will take a bit.")
|
140 |
|
141 |
def _setup_desktop_tools(self):
|
142 |
"""Register all desktop tools"""
|
|
|
150 |
"""
|
151 |
self.desktop.move_mouse(x, y)
|
152 |
self.desktop.left_click()
|
153 |
+
self.logger.log(self.log_path, f"Clicked at coordinates ({x}, {y})")
|
154 |
return f"Clicked at coordinates ({x}, {y})"
|
155 |
|
156 |
@tool
|
|
|
163 |
"""
|
164 |
self.desktop.move_mouse(x, y)
|
165 |
self.desktop.right_click()
|
166 |
+
self.logger.log(self.log_path, f"Right-clicked at coordinates ({x}, {y})")
|
167 |
return f"Right-clicked at coordinates ({x}, {y})"
|
168 |
|
169 |
@tool
|
|
|
176 |
"""
|
177 |
self.desktop.move_mouse(x, y)
|
178 |
self.desktop.double_click()
|
179 |
+
self.logger.log(self.log_path, f"Double-clicked at coordinates ({x}, {y})")
|
180 |
return f"Double-clicked at coordinates ({x}, {y})"
|
181 |
|
182 |
@tool
|
|
|
188 |
y: The y coordinate (vertical position)
|
189 |
"""
|
190 |
self.desktop.move_mouse(x, y)
|
191 |
+
self.logger.log(self.log_path, f"Moved mouse to coordinates ({x}, {y})")
|
192 |
return f"Moved mouse to coordinates ({x}, {y})"
|
193 |
|
194 |
@tool
|
|
|
200 |
delay_in_ms: Delay between keystrokes in milliseconds
|
201 |
"""
|
202 |
self.desktop.write(text, delay_in_ms=delay_in_ms)
|
203 |
+
self.logger.log(self.log_path, f"Typed text: '{text}'")
|
204 |
return f"Typed text: '{text}'"
|
205 |
|
206 |
@tool
|
|
|
213 |
if key == "enter":
|
214 |
key = "Return"
|
215 |
self.desktop.press(key)
|
216 |
+
self.logger.log(self.log_path, f"Pressed key: {key}")
|
217 |
return f"Pressed key: {key}"
|
218 |
|
219 |
@tool
|
|
|
223 |
Args:
|
224 |
"""
|
225 |
self.desktop.press(["alt", "left"])
|
226 |
+
self.logger.log(self.log_path, "Went back one page")
|
227 |
return "Went back one page"
|
228 |
|
229 |
@tool
|
|
|
238 |
"""
|
239 |
self.desktop.drag([x1, y1], [x2, y2])
|
240 |
message = f"Dragged and dropped from [{x1}, {y1}] to [{x2}, {y2}]"
|
241 |
+
self.logger.log(self.log_path, message)
|
242 |
return message
|
243 |
|
244 |
@tool
|
|
|
250 |
amount: The amount to scroll. A good amount is 1 or 2.
|
251 |
"""
|
252 |
self.desktop.scroll(direction=direction, amount=amount)
|
253 |
+
self.logger.log(self.log_path, f"Scrolled {direction} by {amount}")
|
254 |
return f"Scrolled {direction} by {amount}"
|
255 |
|
256 |
@tool
|
|
|
261 |
seconds: Number of seconds to wait
|
262 |
"""
|
263 |
time.sleep(seconds)
|
264 |
+
self.logger.log(self.log_path, f"Waited for {seconds} seconds")
|
265 |
return f"Waited for {seconds} seconds"
|
266 |
|
267 |
@tool
|
|
|
278 |
self.desktop.open(url)
|
279 |
# Give it time to load
|
280 |
time.sleep(2)
|
281 |
+
self.logger.log(self.log_path, f"Opening URL: {url}")
|
282 |
return f"Opened URL: {url}"
|
283 |
|
284 |
|
|
|
307 |
|
308 |
def take_snapshot_callback(self, memory_step: ActionStep, agent=None) -> None:
|
309 |
"""Callback that takes a screenshot + memory snapshot after a step completes"""
|
310 |
+
self.logger.log(self.log_path, "Analyzing screen content...")
|
|
|
311 |
|
312 |
current_step = memory_step.step_number
|
313 |
print(f"Taking screenshot for step {current_step}")
|