Firoj112 commited on
Commit
7a89a47
·
verified ·
1 Parent(s): 3064cc7

Create detect_elements.py

Browse files
Files changed (1) hide show
  1. tools/detect_elements.py +69 -0
tools/detect_elements.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents.tools import Tool
2
+ import cv2
3
+ import numpy as np
4
+ import os
5
+
6
+ def detect_elements(screenshot_path, element_type="table"):
7
+ """
8
+ Detect table-like structures or text boxes in a screenshot using OpenCV.
9
+
10
+ Args:
11
+ screenshot_path (str): Path to the screenshot
12
+ element_type (str): Type of element to detect ('table', 'textbox') (default: 'table')
13
+
14
+ Returns:
15
+ str: JSON with bounding boxes and detection details
16
+ """
17
+ try:
18
+ if not os.path.exists(screenshot_path):
19
+ return f"Screenshot not found: {screenshot_path}"
20
+
21
+ # Read and preprocess image
22
+ image = cv2.imread(screenshot_path)
23
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
24
+ blurred = cv2.GaussianBlur(gray, (5, 5), 0)
25
+ edges = cv2.Canny(blurred, 50, 150)
26
+
27
+ # Detect contours
28
+ contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
29
+ detections = []
30
+
31
+ for contour in contours:
32
+ x, y, w, h = cv2.boundingRect(contour)
33
+ area = w * h
34
+ aspect_ratio = w / h if h > 0 else 0
35
+
36
+ # Filter for tables (rectangular, large area)
37
+ if element_type == "table" and area > 10000 and 0.5 < aspect_ratio < 2.0:
38
+ detections.append({"type": "table", "bbox": [x, y, w, h]})
39
+ # Filter for text boxes (narrow, horizontal)
40
+ elif element_type == "textbox" and area > 500 and aspect_ratio > 2.0:
41
+ detections.append({"type": "textbox", "bbox": [x, y, w, h]})
42
+
43
+ # Draw bounding boxes on a copy of the image
44
+ output_path = screenshot_path.replace(".png", "_detected.png")
45
+ output_image = image.copy()
46
+ for detection in detections:
47
+ x, y, w, h = detection["bbox"]
48
+ color = (0, 255, 0) if detection["type"] == "table" else (0, 0, 255)
49
+ cv2.rectangle(output_image, (x, y), (x + w, y + h), color, 2)
50
+ cv2.imwrite(output_path, output_image)
51
+
52
+ return json.dumps({
53
+ "detections": detections,
54
+ "output_image": output_path
55
+ }) if detections else "No elements detected"
56
+ except Exception as e:
57
+ return f"Failed to detect elements: {str(e)}"
58
+
59
+ # Register the tool
60
+ tool = Tool(
61
+ name="detect_elements",
62
+ description="Detects table-like structures or text boxes in a screenshot using OpenCV.",
63
+ inputs={
64
+ "screenshot_path": {"type": "str", "description": "Path to the screenshot"},
65
+ "element_type": {"type": "str", "default": "table", "description": "Type: 'table' or 'textbox'"}
66
+ },
67
+ output_type="str",
68
+ function=detect_elements
69
+ )