Firoj112 commited on
Commit
ecedda5
·
verified ·
1 Parent(s): 44c942e

Update tools/scroll_page.py

Browse files
Files changed (1) hide show
  1. tools/scroll_page.py +58 -57
tools/scroll_page.py CHANGED
@@ -1,58 +1,59 @@
1
  from smolagents.tools import Tool
2
- from helium import scroll_down, scroll_up, get_driver
3
- from selenium.webdriver.common.by import By
4
-
5
- def scroll_page(driver, selector=None, num_pixels=1200, direction="down"):
6
- """
7
- Scroll the page to a specific element or by a number of pixels.
8
-
9
- Args:
10
- driver: Selenium WebDriver instance
11
- selector (str): CSS selector to scroll to (optional)
12
- num_pixels (int): Number of pixels to scroll (default: 1200)
13
- direction (str): Scroll direction ('down' or 'up') (default: 'down')
14
-
15
- Returns:
16
- str: Result of the scroll action
17
- """
18
- try:
19
- if selector:
20
- element = driver.find_element(By.CSS_SELECTOR, selector)
21
- driver.execute_script("arguments[0].scrollIntoView(true);", element)
22
- return f"Scrolled to element with selector {selector}"
23
- else:
24
- if direction == "down":
25
- scroll_down(num_pixels)
26
- return f"Scrolled down {num_pixels} pixels"
27
- elif direction == "up":
28
- scroll_up(num_pixels)
29
- return f"Scrolled up {num_pixels} pixels"
30
- else:
31
- return f"Invalid direction: {direction}"
32
- except Exception as e:
33
- return f"Failed to scroll: {str(e)}"
34
-
35
- # Register the tool
36
- scroll_page_tool = Tool(
37
- name="scroll_page",
38
- description="Scrolls the page to a specific element or by a number of pixels.",
39
- inputs={
40
- "selector": {
41
- "type": "str",
42
- "default": None,
43
- "description": "CSS selector to scroll to"
44
- },
45
- "num_pixels": {
46
- "type": "int",
47
- "default": 1200,
48
- "description": "Number of pixels to scroll"
49
- },
50
- "direction": {
51
- "type": "str",
52
- "default": "down",
53
- "description": "Scroll direction: 'down' or 'up'"
54
- }
55
- },
56
- output_type="str",
57
- function=scroll_page
58
- )
 
 
1
  from smolagents.tools import Tool
2
+ import cv2
3
+ import numpy as np
4
+ import os
5
+ import json
6
+
7
+ class DetectElementsTool(Tool):
8
+ name = "detect_elements"
9
+ description = "Detects table-like structures or text boxes in a screenshot using OpenCV."
10
+ inputs = {
11
+ "screenshot_path": {"type": "str", "description": "Path to the screenshot"},
12
+ "element_type": {"type": "str", "default": "table", "description": "Type: 'table' or 'textbox'"}
13
+ }
14
+ output_type = "str"
15
+
16
+ def forward(self, **kwargs):
17
+ screenshot_path = kwargs.get("screenshot_path")
18
+ element_type = kwargs.get("element_type", "table")
19
+ try:
20
+ if not os.path.exists(screenshot_path):
21
+ return f"Screenshot not found: {screenshot_path}"
22
+
23
+ # Read and preprocess image
24
+ image = cv2.imread(screenshot_path)
25
+ gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
26
+ blurred = cv2.GaussianBlur(gray, (5, 5), 0)
27
+ edges = cv2.Canny(blurred, 50, 150)
28
+
29
+ # Detect contours
30
+ contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
31
+ detections = []
32
+
33
+ for contour in contours:
34
+ x, y, w, h = cv2.boundingRect(contour)
35
+ area = w * h
36
+ aspect_ratio = w / h if h > 0 else 0
37
+
38
+ # Filter for tables (rectangular, large area)
39
+ if element_type == "table" and area > 10000 and 0.5 < aspect_ratio < 2.0:
40
+ detections.append({"type": "table", "bbox": [x, y, w, h]})
41
+ # Filter for text boxes (narrow, horizontal)
42
+ elif element_type == "textbox" and area > 500 and aspect_ratio > 2.0:
43
+ detections.append({"type": "textbox", "bbox": [x, y, w, h]})
44
+
45
+ # Draw bounding boxes on a copy of the image
46
+ output_path = screenshot_path.replace(".png", "_detected.png")
47
+ output_image = image.copy()
48
+ for detection in detections:
49
+ x, y, w, h = detection["bbox"]
50
+ color = (0, 255, 0) if detection["type"] == "table" else (0, 0, 255)
51
+ cv2.rectangle(output_image, (x, y), (x + w, y + h), color, 2)
52
+ cv2.imwrite(output_path, output_image)
53
+
54
+ return json.dumps({
55
+ "detections": detections,
56
+ "output_image": output_path
57
+ }) if detections else "No elements detected"
58
+ except Exception as e:
59
+ return f"Failed to detect elements: {str(e)}"