Spaces:

Firoj112
/

WebAgents_

Running

App Files Files Community

Firoj112 commited on 11 days ago

Commit

80e17c9

verified ·

1 Parent(s): 5be8e67

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -7

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import gradio as gr
-from smolagents import CodeAgent, LiteLLMModel, tool, GradioUI
 from smolagents.agents import ActionStep
 import helium
 from selenium import webdriver
@@ -99,7 +99,7 @@ def save_screenshot(memory_step: ActionStep, agent: CodeAgent) -> Image.Image:
         # Save new screenshot
         png_bytes = driver.get_screenshot_as_png()
         image = Image.open(BytesIO(png_bytes))
-        screenshot_dir = os.path.join(tempfile.gettempdir(), "web_agent_sScreenshots")
         os.makedirs(screenshot_dir, exist_ok=True)
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         screenshot_filename = f"screenshot_step_{current_step}_{timestamp}.png"
@@ -169,7 +169,7 @@ def run_agent_chat(user_input: str, history: list):
             url = user_input.split()[0] if user_input.startswith("http") else next((w for w in user_input.split() if w.startswith("http")), "")
             request = user_input.replace(url, "").strip() or "Navigate to the URL and describe the page."
         else:
-            url = "https://example.com"  # Default URL if none provided
             request = user_input
         search_request = f"Please go to {url}. {request}"
@@ -198,8 +198,21 @@ def run_agent_chat(user_input: str, history: list):
         except:
             logger.warning("Failed to close Chrome driver.")
-# Launch GradioUI (Option 1: Preferred)
 if __name__ == "__main__":
-    file_upload_folder = os.path.join(tempfile.gettempdir(), "web_agent_uploads")
-    os.makedirs(file_upload_folder, exist_ok=True)
-    GradioUI(agent=agent, file_upload_folder=file_upload_folder).launch()

 import os
 import gradio as gr
+from smolagents import CodeAgent, LiteLLMModel, tool
 from smolagents.agents import ActionStep
 import helium
 from selenium import webdriver
         # Save new screenshot
         png_bytes = driver.get_screenshot_as_png()
         image = Image.open(BytesIO(png_bytes))
+        screenshot_dir = os.path.join(tempfile.gettempdir(), "web_agent_screenshots")
         os.makedirs(screenshot_dir, exist_ok=True)
         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         screenshot_filename = f"screenshot_step_{current_step}_{timestamp}.png"
             url = user_input.split()[0] if user_input.startswith("http") else next((w for w in user_input.split() if w.startswith("http")), "")
             request = user_input.replace(url, "").strip() or "Navigate to the URL and describe the page."
         else:
+            url = "https://example.com"
             request = user_input
         search_request = f"Please go to {url}. {request}"
         except:
             logger.warning("Failed to close Chrome driver.")
+# Custom Gradio interface
+def process_input(user_input, history):
+    if not user_input.strip():
+        return history, None
+    output, latest_screenshot = run_agent_chat(user_input, history)
+    new_history = history + [[user_input, output]]
+    return new_history, latest_screenshot
 if __name__ == "__main__":
+    with gr.Blocks() as demo:
+        gr.Markdown("# Web Navigation Agent")
+        chatbot = gr.Chatbot(label="Chat")
+        msg = gr.Textbox(placeholder="Enter URL and request (e.g., https://github.com/trending Click on Developers)")
+        btn = gr.Button("Send")
+        image = gr.Image(label="Latest Screenshot")
+        btn.click(process_input, inputs=[msg, chatbot], outputs=[chatbot, image])
+        msg.submit(process_input, inputs=[msg, chatbot], outputs=[chatbot, image])
+    demo.launch()