Firoj112 commited on
Commit
44c942e
·
verified ·
1 Parent(s): 8430937

Update tools/detect_elements.py

Browse files
Files changed (1) hide show
  1. tools/detect_elements.py +53 -71
tools/detect_elements.py CHANGED
@@ -4,74 +4,56 @@ import numpy as np
4
  import os
5
  import json
6
 
7
- def detect_elements(screenshot_path, element_type="table"):
8
- """
9
- Detect table-like structures or text boxes in a screenshot using OpenCV.
10
-
11
- Args:
12
- screenshot_path (str): Path to the screenshot
13
- element_type (str): Type of element to detect ('table', 'textbox') (default: 'table')
14
-
15
- Returns:
16
- str: JSON with bounding boxes and detection details
17
- """
18
- try:
19
- if not os.path.exists(screenshot_path):
20
- return f"Screenshot not found: {screenshot_path}"
21
-
22
- # Read and preprocess image
23
- image = cv2.imread(screenshot_path)
24
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
25
- blurred = cv2.GaussianBlur(gray, (5, 5), 0)
26
- edges = cv2.Canny(blurred, 50, 150)
27
-
28
- # Detect contours
29
- contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
30
- detections = []
31
-
32
- for contour in contours:
33
- x, y, w, h = cv2.boundingRect(contour)
34
- area = w * h
35
- aspect_ratio = w / h if h > 0 else 0
36
-
37
- # Filter for tables (rectangular, large area)
38
- if element_type == "table" and area > 10000 and 0.5 < aspect_ratio < 2.0:
39
- detections.append({"type": "table", "bbox": [x, y, w, h]})
40
- # Filter for text boxes (narrow, horizontal)
41
- elif element_type == "textbox" and area > 500 and aspect_ratio > 2.0:
42
- detections.append({"type": "textbox", "bbox": [x, y, w, h]})
43
-
44
- # Draw bounding boxes on a copy of the image
45
- output_path = screenshot_path.replace(".png", "_detected.png")
46
- output_image = image.copy()
47
- for detection in detections:
48
- x, y, w, h = detection["bbox"]
49
- color = (0, 255, 0) if detection["type"] == "table" else (0, 0, 255)
50
- cv2.rectangle(output_image, (x, y), (x + w, y + h), color, 2)
51
- cv2.imwrite(output_path, output_image)
52
-
53
- return json.dumps({
54
- "detections": detections,
55
- "output_image": output_path
56
- }) if detections else "No elements detected"
57
- except Exception as e:
58
- return f"Failed to detect elements: {str(e)}"
59
-
60
- # Register the tool
61
- detect_elements_tool = Tool(
62
- name="detect_elements",
63
- description="Detects table-like structures or text boxes in a screenshot using OpenCV.",
64
- inputs={
65
- "screenshot_path": {
66
- "type": "str",
67
- "description": "Path to the screenshot"
68
- },
69
- "element_type": {
70
- "type": "str",
71
- "default": "table",
72
- "description": "Type: 'table' or 'textbox'"
73
- }
74
- },
75
- output_type="str",
76
- function=detect_elements
77
- )
 
4
  import os
5
  import json
6
 
7
+ class DetectElementsTool(Tool):
8
+ name = "detect_elements"
9
+ description = "Detects table-like structures or text boxes in a screenshot using OpenCV."
10
+ inputs = {
11
+ "screenshot_path": {"type": "str", "description": "Path to the screenshot"},
12
+ "element_type": {"type": "str", "default": "table", "description": "Type: 'table' or 'textbox'"}
13
+ }
14
+ output_type = "str"
15
+
16
+ def forward(self, **kwargs):
17
+ screenshot_path = kwargs.get("screenshot_path")
18
+ element_type = kwargs.get("element_type", "table")
19
+ try:
20
+ if not os.path.exists(screenshot_path):
21
+ return f"Screenshot not found: {screenshot_path}"
22
+
23
+ # Read and preprocess image
24
+ image = cv2.imread(screenshot_path)
25
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
26
+ blurred = cv2.GaussianBlur(gray, (5, 5), 0)
27
+ edges = cv2.Canny(blurred, 50, 150)
28
+
29
+ # Detect contours
30
+ contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
31
+ detections = []
32
+
33
+ for contour in contours:
34
+ x, y, w, h = cv2.boundingRect(contour)
35
+ area = w * h
36
+ aspect_ratio = w / h if h > 0 else 0
37
+
38
+ # Filter for tables (rectangular, large area)
39
+ if element_type == "table" and area > 10000 and 0.5 < aspect_ratio < 2.0:
40
+ detections.append({"type": "table", "bbox": [x, y, w, h]})
41
+ # Filter for text boxes (narrow, horizontal)
42
+ elif element_type == "textbox" and area > 500 and aspect_ratio > 2.0:
43
+ detections.append({"type": "textbox", "bbox": [x, y, w, h]})
44
+
45
+ # Draw bounding boxes on a copy of the image
46
+ output_path = screenshot_path.replace(".png", "_detected.png")
47
+ output_image = image.copy()
48
+ for detection in detections:
49
+ x, y, w, h = detection["bbox"]
50
+ color = (0, 255, 0) if detection["type"] == "table" else (0, 0, 255)
51
+ cv2.rectangle(output_image, (x, y), (x + w, y + h), color, 2)
52
+ cv2.imwrite(output_path, output_image)
53
+
54
+ return json.dumps({
55
+ "detections": detections,
56
+ "output_image": output_path
57
+ }) if detections else "No elements detected"
58
+ except Exception as e:
59
+ return f"Failed to detect elements: {str(e)}"