Firoj112 commited on
Commit
dd2cc46
·
verified ·
1 Parent(s): ecedda5

Update tools/scroll_page.py

Browse files
Files changed (1) hide show
  1. tools/scroll_page.py +28 -50
tools/scroll_page.py CHANGED
@@ -1,59 +1,37 @@
1
  from smolagents.tools import Tool
2
- import cv2
3
- import numpy as np
4
- import os
5
- import json
6
 
7
- class DetectElementsTool(Tool):
8
- name = "detect_elements"
9
- description = "Detects table-like structures or text boxes in a screenshot using OpenCV."
10
  inputs = {
11
- "screenshot_path": {"type": "str", "description": "Path to the screenshot"},
12
- "element_type": {"type": "str", "default": "table", "description": "Type: 'table' or 'textbox'"}
 
13
  }
14
  output_type = "str"
15
 
 
 
 
16
  def forward(self, **kwargs):
17
- screenshot_path = kwargs.get("screenshot_path")
18
- element_type = kwargs.get("element_type", "table")
 
19
  try:
20
- if not os.path.exists(screenshot_path):
21
- return f"Screenshot not found: {screenshot_path}"
22
-
23
- # Read and preprocess image
24
- image = cv2.imread(screenshot_path)
25
- gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
26
- blurred = cv2.GaussianBlur(gray, (5, 5), 0)
27
- edges = cv2.Canny(blurred, 50, 150)
28
-
29
- # Detect contours
30
- contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
31
- detections = []
32
-
33
- for contour in contours:
34
- x, y, w, h = cv2.boundingRect(contour)
35
- area = w * h
36
- aspect_ratio = w / h if h > 0 else 0
37
-
38
- # Filter for tables (rectangular, large area)
39
- if element_type == "table" and area > 10000 and 0.5 < aspect_ratio < 2.0:
40
- detections.append({"type": "table", "bbox": [x, y, w, h]})
41
- # Filter for text boxes (narrow, horizontal)
42
- elif element_type == "textbox" and area > 500 and aspect_ratio > 2.0:
43
- detections.append({"type": "textbox", "bbox": [x, y, w, h]})
44
-
45
- # Draw bounding boxes on a copy of the image
46
- output_path = screenshot_path.replace(".png", "_detected.png")
47
- output_image = image.copy()
48
- for detection in detections:
49
- x, y, w, h = detection["bbox"]
50
- color = (0, 255, 0) if detection["type"] == "table" else (0, 0, 255)
51
- cv2.rectangle(output_image, (x, y), (x + w, y + h), color, 2)
52
- cv2.imwrite(output_path, output_image)
53
-
54
- return json.dumps({
55
- "detections": detections,
56
- "output_image": output_path
57
- }) if detections else "No elements detected"
58
  except Exception as e:
59
- return f"Failed to detect elements: {str(e)}"
 
1
  from smolagents.tools import Tool
2
+ from helium import scroll_down, scroll_up, get_driver
3
+ from selenium.webdriver.common.by import By
 
 
4
 
5
+ class ScrollPageTool(Tool):
6
+ name = "scroll_page"
7
+ description = "Scrolls the page to a specific element or by a number of pixels."
8
  inputs = {
9
+ "selector": {"type": "str", "default": None, "description": "CSS selector to scroll to"},
10
+ "num_pixels": {"type": "int", "default": 1200, "description": "Number of pixels to scroll"},
11
+ "direction": {"type": "str", "default": "down", "description": "Scroll direction: 'down' or 'up'"}
12
  }
13
  output_type = "str"
14
 
15
+ def __init__(self, driver):
16
+ self.driver = driver
17
+
18
  def forward(self, **kwargs):
19
+ selector = kwargs.get("selector")
20
+ num_pixels = kwargs.get("num_pixels", 1200)
21
+ direction = kwargs.get("direction", "down")
22
  try:
23
+ if selector:
24
+ element = self.driver.find_element(By.CSS_SELECTOR, selector)
25
+ self.driver.execute_script("arguments[0].scrollIntoView(true);", element)
26
+ return f"Scrolled to element with selector {selector}"
27
+ else:
28
+ if direction == "down":
29
+ scroll_down(num_pixels)
30
+ return f"Scrolled down {num_pixels} pixels"
31
+ elif direction == "up":
32
+ scroll_up(num_pixels)
33
+ return f"Scrolled up {num_pixels} pixels"
34
+ else:
35
+ return f"Invalid direction: {direction}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  except Exception as e:
37
+ return f"Failed to scroll: {str(e)}"