File size: 2,212 Bytes
e9ed5be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from smolagents.tools import Tool
from helium import S
from selenium.webdriver.common.by import By
import json

def scrape_text(driver, selector="p", extract_table=False):
    """
    Scrape text or table data from elements matching a CSS selector on the current page.
    
    Args:
        driver: Selenium WebDriver instance
        selector (str): CSS selector to target elements (default: 'p' for paragraphs)
        extract_table (bool): If True, extract table data as JSON (default: False)
    
    Returns:
        str or dict: Text from elements or JSON table data
    """
    try:
        if extract_table:
            tables = driver.find_elements(By.CSS_SELECTOR, selector)
            if not tables:
                return "No tables found for selector"
            table_data = []
            for table in tables:
                rows = table.find_elements(By.TAG_NAME, "tr")
                table_rows = []
                for row in rows:
                    cells = row.find_elements(By.TAG_NAME, "td") or row.find_elements(By.TAG_NAME, "th")
                    row_data = [cell.text.strip() for cell in cells if cell.text.strip()]
                    if row_data:
                        table_rows.append(row_data)
                if table_rows:
                    table_data.append(table_rows)
            return json.dumps(table_data) if table_data else "No table data found"
        else:
            elements = driver.find_elements(By.CSS_SELECTOR, selector)
            text_list = [element.text.strip() for element in elements if element.text.strip()]
            return "\n".join(text_list) if text_list else "No text found for selector"
    except Exception as e:
        return f"Failed to scrape with selector {selector}: {str(e)}"

# Register the tool
tool = Tool(
    name="scrape_text",
    description="Scrapes text or table data from elements matching a CSS selector on the current page.",
    inputs={
        "selector": {"type": "str", "default": "p", "description": "CSS selector to target elements"},
        "extract_table": {"type": "bool", "default": False, "description": "If True, extract table data as JSON"}
    },
    output_type="str",
    function=scrape_text
)