Spaces:

Firoj112
/

WebAgents_

Running

App Files Files Community

Firoj112 commited on 9 days ago

Commit

dd2cc46

verified ·

1 Parent(s): ecedda5

Update tools/scroll_page.py

Browse files

Files changed (1) hide show

tools/scroll_page.py +28 -50

tools/scroll_page.py CHANGED Viewed

@@ -1,59 +1,37 @@
 from smolagents.tools import Tool
-import cv2
-import numpy as np
-import os
-import json
-class DetectElementsTool(Tool):
-    name = "detect_elements"
-    description = "Detects table-like structures or text boxes in a screenshot using OpenCV."
     inputs = {
-        "screenshot_path": {"type": "str", "description": "Path to the screenshot"},
-        "element_type": {"type": "str", "default": "table", "description": "Type: 'table' or 'textbox'"}
     }
     output_type = "str"
     def forward(self, **kwargs):
-        screenshot_path = kwargs.get("screenshot_path")
-        element_type = kwargs.get("element_type", "table")
         try:
-            if not os.path.exists(screenshot_path):
-                return f"Screenshot not found: {screenshot_path}"
-            # Read and preprocess image
-            image = cv2.imread(screenshot_path)
-            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-            blurred = cv2.GaussianBlur(gray, (5, 5), 0)
-            edges = cv2.Canny(blurred, 50, 150)
-            # Detect contours
-            contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-            detections = []
-            for contour in contours:
-                x, y, w, h = cv2.boundingRect(contour)
-                area = w * h
-                aspect_ratio = w / h if h > 0 else 0
-                # Filter for tables (rectangular, large area)
-                if element_type == "table" and area > 10000 and 0.5 < aspect_ratio < 2.0:
-                    detections.append({"type": "table", "bbox": [x, y, w, h]})
-                # Filter for text boxes (narrow, horizontal)
-                elif element_type == "textbox" and area > 500 and aspect_ratio > 2.0:
-                    detections.append({"type": "textbox", "bbox": [x, y, w, h]})
-            # Draw bounding boxes on a copy of the image
-            output_path = screenshot_path.replace(".png", "_detected.png")
-            output_image = image.copy()
-            for detection in detections:
-                x, y, w, h = detection["bbox"]
-                color = (0, 255, 0) if detection["type"] == "table" else (0, 0, 255)
-                cv2.rectangle(output_image, (x, y), (x + w, y + h), color, 2)
-            cv2.imwrite(output_path, output_image)
-            return json.dumps({
-                "detections": detections,
-                "output_image": output_path
-            }) if detections else "No elements detected"
         except Exception as e:
-            return f"Failed to detect elements: {str(e)}"

 from smolagents.tools import Tool
+from helium import scroll_down, scroll_up, get_driver
+from selenium.webdriver.common.by import By
+class ScrollPageTool(Tool):
+    name = "scroll_page"
+    description = "Scrolls the page to a specific element or by a number of pixels."
     inputs = {
+        "selector": {"type": "str", "default": None, "description": "CSS selector to scroll to"},
+        "num_pixels": {"type": "int", "default": 1200, "description": "Number of pixels to scroll"},
+        "direction": {"type": "str", "default": "down", "description": "Scroll direction: 'down' or 'up'"}
     }
     output_type = "str"
+    def __init__(self, driver):
+        self.driver = driver
     def forward(self, **kwargs):
+        selector = kwargs.get("selector")
+        num_pixels = kwargs.get("num_pixels", 1200)
+        direction = kwargs.get("direction", "down")
         try:
+            if selector:
+                element = self.driver.find_element(By.CSS_SELECTOR, selector)
+                self.driver.execute_script("arguments[0].scrollIntoView(true);", element)
+                return f"Scrolled to element with selector {selector}"
+            else:
+                if direction == "down":
+                    scroll_down(num_pixels)
+                    return f"Scrolled down {num_pixels} pixels"
+                elif direction == "up":
+                    scroll_up(num_pixels)
+                    return f"Scrolled up {num_pixels} pixels"
+                else:
+                    return f"Invalid direction: {direction}"
         except Exception as e:
+            return f"Failed to scroll: {str(e)}"