Spaces:

katiue
/

browser-use-webui

Running

App Files Files Community

katiue commited on Jan 5

Commit

1ce41fe

verified ·

1 Parent(s): d908334

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

webui.py +79 -64

webui.py CHANGED Viewed

@@ -132,9 +132,10 @@ async def run_org_agent(
             model_thoughts = history.model_thoughts()
         recorded_files = get_latest_files(save_recording_path)
         await browser.close()
-        return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), recorded_files.get('.zip')
     else:
         # Reuse existing context
         agent = Agent(
@@ -149,7 +150,8 @@ async def run_org_agent(
         model_actions = history.model_actions()
         model_thoughts = history.model_thoughts()
         recorded_files = get_latest_files(save_recording_path)
-        return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), recorded_files.get('.zip')
 async def run_custom_agent(
@@ -209,7 +211,8 @@ async def run_custom_agent(
             model_actions = history.model_actions()
             model_thoughts = history.model_thoughts()
             recorded_files = get_latest_files(save_recording_path)
-            return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), recorded_files.get('.zip')
         else:
             browser = CustomBrowser(
                 config=BrowserConfig(
@@ -301,7 +304,7 @@ async def run_with_stream(*args):
                     html_content = f"<div class='error'>Screenshot error: {str(e)}</div>"
                 yield [html_content, final_result, errors, model_actions, model_thoughts, recording, trace]
-                await asyncio.sleep(0.2)
             # Get agent results when done
             try:
@@ -343,65 +346,77 @@ def main():
     # Gradio UI setup
     with gr.Blocks(title="Browser Use WebUI", theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo:
         gr.Markdown("<center><h1>Browser Use WebUI</h1></center>")
-        with gr.Row():
-            agent_type = gr.Radio(["org", "custom"], label="Agent Type", value="custom")
-            max_steps = gr.Number(label="max run steps", value=100)
-            use_vision = gr.Checkbox(label="use vision", value=True)
-        with gr.Row():
-            llm_provider = gr.Dropdown(
-                ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], label="LLM Provider", value="gemini"
-            )
-            llm_model_name = gr.Textbox(label="LLM Model Name", value="gemini-2.0-flash-exp")
-            llm_temperature = gr.Number(label="LLM Temperature", value=1.0)
-        with gr.Row():
-            llm_base_url = gr.Textbox(label="LLM Base URL")
-            llm_api_key = gr.Textbox(label="LLM API Key", type="password")
-        with gr.Accordion("Browser Settings", open=False):
-            use_own_browser = gr.Checkbox(label="Use Own Browser", value=False)
-            headless = gr.Checkbox(label="Headless", value=False)
-            disable_security = gr.Checkbox(label="Disable Security", value=True)
-            with gr.Row():
-                window_w = gr.Number(label="Window Width", value=1920)
-                window_h = gr.Number(label="Window Height", value=1080)
-            save_recording_path = gr.Textbox(label="Save Recording Path", placeholder="e.g. ./tmp/record_videos",
-                                             value="./tmp/record_videos")
-        with gr.Accordion("Task Settings", open=True):
-            task = gr.Textbox(label="Task", lines=10,
-                              value="go to google.com and type 'OpenAI' click search and give me the first url")
-            add_infos = gr.Textbox(label="Additional Infos(Optional): Hints to help LLM complete Task", lines=5)
-        run_button = gr.Button("Run Agent", variant="primary")
-        with gr.Column():
-            # Add live stream viewer before other components
-            browser_view = gr.HTML(
-                label="Live Browser View",
-                value="<div style='width:100%; height:600px; border:1px solid #ccc; display:flex; align-items:center; justify-content:center;'><p>Waiting for browser session...</p></div>"
-            )
-            final_result_output = gr.Textbox(label="Final Result", lines=5)
-            errors_output = gr.Textbox(label="Errors", lines=5)
-            model_actions_output = gr.Textbox(label="Model Actions", lines=5)
-            model_thoughts_output = gr.Textbox(label="Model Thoughts", lines=5)
-            with gr.Row():
-                recording_file = gr.Video(label="Recording File")  # Changed from gr.File to gr.Video
-                trace_file = gr.File(label="Trace File (ZIP)")
-        # Add a refresh button
-        refresh_button = gr.Button("Refresh Files")
-        def refresh_files():
-            recorded_files = get_latest_files("./tmp/record_videos")
-            return (
-                recorded_files.get('.webm') if recorded_files.get('.webm') else None,
-                recorded_files.get('.zip') if recorded_files.get('.zip') else None
-            )
-        refresh_button.click(
-            fn=refresh_files,
-            inputs=[],
-            outputs=[recording_file, trace_file]
-        )
         run_button.click(
             fn=run_with_stream,
             inputs=[
@@ -422,7 +437,7 @@ def main():
                 max_steps,
                 use_vision
             ],
-            outputs=[  # Change from dict to list
                 browser_view,
                 final_result_output,
                 errors_output,
@@ -441,8 +456,8 @@ if __name__ == "__main__":
     # For local development
     import argparse
     parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
-    parser.add_argument("--ip", type=str, default="127.0.0.1", help="IP address to bind to")
-    parser.add_argument("--port", type=int, default=7788, help="Port to listen on")
     args = parser.parse_args()
     main()
 else:

             model_thoughts = history.model_thoughts()
         recorded_files = get_latest_files(save_recording_path)
+        trace_file = get_latest_files(save_recording_path + "/../traces")
         await browser.close()
+        return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), trace_file.get('.zip')
     else:
         # Reuse existing context
         agent = Agent(
         model_actions = history.model_actions()
         model_thoughts = history.model_thoughts()
         recorded_files = get_latest_files(save_recording_path)
+        trace_file = get_latest_files(save_recording_path + "/../traces")
+        return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), trace_file.get('.zip')
 async def run_custom_agent(
             model_actions = history.model_actions()
             model_thoughts = history.model_thoughts()
             recorded_files = get_latest_files(save_recording_path)
+            trace_file = get_latest_files(save_recording_path + "/../traces")
+            return final_result, errors, model_actions, model_thoughts, recorded_files.get('.webm'), trace_file.get('.zip')
         else:
             browser = CustomBrowser(
                 config=BrowserConfig(
                     html_content = f"<div class='error'>Screenshot error: {str(e)}</div>"
                 yield [html_content, final_result, errors, model_actions, model_thoughts, recording, trace]
+                await asyncio.sleep(0.01)
             # Get agent results when done
             try:
     # Gradio UI setup
     with gr.Blocks(title="Browser Use WebUI", theme=gr.themes.Soft(font=[gr.themes.GoogleFont("Plus Jakarta Sans")])) as demo:
         gr.Markdown("<center><h1>Browser Use WebUI</h1></center>")
+        with gr.Tabs():
+            # Tab for LLM Settings
+            with gr.Tab("LLM Settings"):
+                with gr.Row():
+                    llm_provider = gr.Dropdown(
+                        ["anthropic", "openai", "gemini", "azure_openai", "deepseek"], label="LLM Provider", value="gemini"
+                    )
+                    llm_model_name = gr.Textbox(label="LLM Model Name", value="gemini-2.0-flash-exp")
+                    llm_temperature = gr.Number(label="LLM Temperature", value=1.0)
+                with gr.Row():
+                    llm_base_url = gr.Textbox(label="LLM Base URL")
+                    llm_api_key = gr.Textbox(label="LLM API Key", type="password")
+            # Tab for Browser Settings
+            with gr.Tab("Browser Settings"):
+                with gr.Accordion("Browser Settings", open=True):
+                    use_own_browser = gr.Checkbox(label="Use Own Browser", value=False)
+                    headless = gr.Checkbox(label="Headless", value=False)
+                    disable_security = gr.Checkbox(label="Disable Security", value=True)
+                    with gr.Row():
+                        window_w = gr.Number(label="Window Width", value=1920)
+                        window_h = gr.Number(label="Window Height", value=1080)
+                    save_recording_path = gr.Textbox(label="Save Recording Path", placeholder="e.g. ./tmp/record_videos",
+                                                     value="./tmp/record_videos")
+            # Tab for Task Settings
+            with gr.Tab("Task Settings"):
+                with gr.Accordion("Task Settings", open=True):
+                    task = gr.Textbox(label="Task", lines=10,
+                                      value="go to google.com and type 'OpenAI' click search and give me the first url")
+                    add_infos = gr.Textbox(label="Additional Infos (Optional): Hints to help LLM complete Task", lines=5)
+                    agent_type = gr.Radio(["org", "custom"], label="Agent Type", value="custom")
+                    max_steps = gr.Number(label="Max Run Steps", value=100)
+                    use_vision = gr.Checkbox(label="Use Vision", value=True)
+            # Tab for Stream + File Download and Agent Thoughts
+            with gr.Tab("Results"):
+                with gr.Column():
+                    # Add live stream viewer before other components
+                    browser_view = gr.HTML(
+                        label="Live Browser View",
+                        value="<div style='width:100%; height:600px; border:1px solid #ccc; display:flex; align-items:center; justify-content:center;'><p>Waiting for browser session...</p></div>"
+                    )
+                    final_result_output = gr.Textbox(label="Final Result", lines=5)
+                    errors_output = gr.Textbox(label="Errors", lines=5)
+                    model_actions_output = gr.Textbox(label="Model Actions", lines=5)
+                    model_thoughts_output = gr.Textbox(label="Model Thoughts", lines=5)
+                    with gr.Row():
+                        recording_file = gr.Video(label="Recording File")  # Changed from gr.File to gr.Video
+                        trace_file = gr.File(label="Trace File (ZIP)")
+                    # Add a refresh button
+                    refresh_button = gr.Button("Refresh Files")
+                    def refresh_files():
+                        recorded_files = get_latest_files("./tmp/record_videos")
+                        trace_file = get_latest_files("./tmp/traces")
+                        return (
+                            recorded_files.get('.webm') if recorded_files.get('.webm') else None,
+                            trace_file.get('.zip') if trace_file.get('.zip') else None
+                        )
+                    refresh_button.click(
+                        fn=refresh_files,
+                        inputs=[],
+                        outputs=[recording_file, trace_file]
+                    )
+        # Run button outside tabs for global execution
+        run_button = gr.Button("Run Agent", variant="primary")
         run_button.click(
             fn=run_with_stream,
             inputs=[
                 max_steps,
                 use_vision
             ],
+            outputs=[
                 browser_view,
                 final_result_output,
                 errors_output,
     # For local development
     import argparse
     parser = argparse.ArgumentParser(description="Gradio UI for Browser Agent")
+    parser.add_argument("--ip", type=str, default="0.0.0.0", help="IP address to bind to")
+    parser.add_argument("--port", type=int, default=7860, help="Port to listen on")
     args = parser.parse_args()
     main()
 else: