Spaces:

Firoj112
/

WebAgents_

Running

App Files Files Community

Firoj112 commited on 9 days ago

Commit

44c942e

verified ·

1 Parent(s): 8430937

Update tools/detect_elements.py

Browse files

Files changed (1) hide show

tools/detect_elements.py +53 -71

tools/detect_elements.py CHANGED Viewed

@@ -4,74 +4,56 @@ import numpy as np
 import os
 import json
-def detect_elements(screenshot_path, element_type="table"):
-    """
-    Detect table-like structures or text boxes in a screenshot using OpenCV.
-    Args:
-        screenshot_path (str): Path to the screenshot
-        element_type (str): Type of element to detect ('table', 'textbox') (default: 'table')
-    Returns:
-        str: JSON with bounding boxes and detection details
-    """
-    try:
-        if not os.path.exists(screenshot_path):
-            return f"Screenshot not found: {screenshot_path}"
-        # Read and preprocess image
-        image = cv2.imread(screenshot_path)
-        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-        blurred = cv2.GaussianBlur(gray, (5, 5), 0)
-        edges = cv2.Canny(blurred, 50, 150)
-        # Detect contours
-        contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
-        detections = []
-        for contour in contours:
-            x, y, w, h = cv2.boundingRect(contour)
-            area = w * h
-            aspect_ratio = w / h if h > 0 else 0
-            # Filter for tables (rectangular, large area)
-            if element_type == "table" and area > 10000 and 0.5 < aspect_ratio < 2.0:
-                detections.append({"type": "table", "bbox": [x, y, w, h]})
-            # Filter for text boxes (narrow, horizontal)
-            elif element_type == "textbox" and area > 500 and aspect_ratio > 2.0:
-                detections.append({"type": "textbox", "bbox": [x, y, w, h]})
-        # Draw bounding boxes on a copy of the image
-        output_path = screenshot_path.replace(".png", "_detected.png")
-        output_image = image.copy()
-        for detection in detections:
-            x, y, w, h = detection["bbox"]
-            color = (0, 255, 0) if detection["type"] == "table" else (0, 0, 255)
-            cv2.rectangle(output_image, (x, y), (x + w, y + h), color, 2)
-        cv2.imwrite(output_path, output_image)
-        return json.dumps({
-            "detections": detections,
-            "output_image": output_path
-        }) if detections else "No elements detected"
-    except Exception as e:
-        return f"Failed to detect elements: {str(e)}"
-# Register the tool
-detect_elements_tool = Tool(
-    name="detect_elements",
-    description="Detects table-like structures or text boxes in a screenshot using OpenCV.",
-    inputs={
-        "screenshot_path": {
-            "type": "str",
-            "description": "Path to the screenshot"
-        },
-        "element_type": {
-            "type": "str",
-            "default": "table",
-            "description": "Type: 'table' or 'textbox'"
-        }
-    },
-    output_type="str",
-    function=detect_elements
-)

 import os
 import json
+class DetectElementsTool(Tool):
+    name = "detect_elements"
+    description = "Detects table-like structures or text boxes in a screenshot using OpenCV."
+    inputs = {
+        "screenshot_path": {"type": "str", "description": "Path to the screenshot"},
+        "element_type": {"type": "str", "default": "table", "description": "Type: 'table' or 'textbox'"}
+    }
+    output_type = "str"
+    def forward(self, **kwargs):
+        screenshot_path = kwargs.get("screenshot_path")
+        element_type = kwargs.get("element_type", "table")
+        try:
+            if not os.path.exists(screenshot_path):
+                return f"Screenshot not found: {screenshot_path}"
+            # Read and preprocess image
+            image = cv2.imread(screenshot_path)
+            gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+            blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+            edges = cv2.Canny(blurred, 50, 150)
+            # Detect contours
+            contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+            detections = []
+            for contour in contours:
+                x, y, w, h = cv2.boundingRect(contour)
+                area = w * h
+                aspect_ratio = w / h if h > 0 else 0
+                # Filter for tables (rectangular, large area)
+                if element_type == "table" and area > 10000 and 0.5 < aspect_ratio < 2.0:
+                    detections.append({"type": "table", "bbox": [x, y, w, h]})
+                # Filter for text boxes (narrow, horizontal)
+                elif element_type == "textbox" and area > 500 and aspect_ratio > 2.0:
+                    detections.append({"type": "textbox", "bbox": [x, y, w, h]})
+            # Draw bounding boxes on a copy of the image
+            output_path = screenshot_path.replace(".png", "_detected.png")
+            output_image = image.copy()
+            for detection in detections:
+                x, y, w, h = detection["bbox"]
+                color = (0, 255, 0) if detection["type"] == "table" else (0, 0, 255)
+                cv2.rectangle(output_image, (x, y), (x + w, y + h), color, 2)
+            cv2.imwrite(output_path, output_image)
+            return json.dumps({
+                "detections": detections,
+                "output_image": output_path
+            }) if detections else "No elements detected"
+        except Exception as e:
+            return f"Failed to detect elements: {str(e)}"