Spaces:

Firoj112
/

WebAgents_

Running

App Files Files Community

Firoj112 commited on 11 days ago

Commit

5dee8aa

verified ·

1 Parent(s): ec6a434

Update tools/scrape_text.py

Browse files

Files changed (1) hide show

tools/scrape_text.py +37 -54

tools/scrape_text.py CHANGED Viewed

@@ -3,58 +3,41 @@ from helium import S
 from selenium.webdriver.common.by import By
 import json
-def scrape_text(driver, selector="p", extract_table=False):
-    """
-    Scrape text or table data from elements matching a CSS selector on the current page.
-    Args:
-        driver: Selenium WebDriver instance
-        selector (str): CSS selector to target elements (default: 'p' for paragraphs)
-        extract_table (bool): If True, extract table data as JSON (default: False)
-    Returns:
-        str or dict: Text from elements or JSON table data
-    """
-    try:
-        if extract_table:
-            tables = driver.find_elements(By.CSS_SELECTOR, selector)
-            if not tables:
-                return "No tables found for selector"
-            table_data = []
-            for table in tables:
-                rows = table.find_elements(By.TAG_NAME, "tr")
-                table_rows = []
-                for row in rows:
-                    cells = row.find_elements(By.TAG_NAME, "td") or row.find_elements(By.TAG_NAME, "th")
-                    row_data = [cell.text.strip() for cell in cells if cell.text.strip()]
-                    if row_data:
-                        table_rows.append(row_data)
-                if table_rows:
-                    table_data.append(table_rows)
-            return json.dumps(table_data) if table_data else "No table data found"
-        else:
-            elements = driver.find_elements(By.CSS_SELECTOR, selector)
-            text_list = [element.text.strip() for element in elements if element.text.strip()]
-            return "\n".join(text_list) if text_list else "No text found for selector"
-    except Exception as e:
-        return f"Failed to scrape with selector {selector}: {str(e)}"
-# Register the tool
-scrape_text_tool = Tool(
-    name="scrape_text",
-    description="Scrapes text or table data from elements matching a CSS selector on the current page.",
-    inputs={
-        "selector": {
-            "type": "str",
-            "default": "p",
-            "description": "CSS selector to target elements"
-        },
-        "extract_table": {
-            "type": "bool",
-            "default": False,
-            "description": "If True, extract table data as JSON"
-        }
-    },
-    output_type="str",
-    function=scrape_text
-)

 from selenium.webdriver.common.by import By
 import json
+class ScrapeTextTool(Tool):
+    name = "scrape_text"
+    description = "Scrapes text or table data from elements matching a CSS selector on the current page."
+    inputs = {
+        "selector": {"type": "str", "default": "p", "description": "CSS selector to target elements"},
+        "extract_table": {"type": "bool", "default": False, "description": "If True, extract table data as JSON"}
+    }
+    output_type = "str"
+    def __init__(self, driver):
+        self.driver = driver
+    def forward(self, **kwargs):
+        selector = kwargs.get("selector", "p")
+        extract_table = kwargs.get("extract_table", False)
+        try:
+            if extract_table:
+                tables = self.driver.find_elements(By.CSS_SELECTOR, selector)
+                if not tables:
+                    return "No tables found for selector"
+                table_data = []
+                for table in tables:
+                    rows = table.find_elements(By.TAG_NAME, "tr")
+                    table_rows = []
+                    for row in rows:
+                        cells = row.find_elements(By.TAG_NAME, "td") or row.find_elements(By.TAG_NAME, "th")
+                        row_data = [cell.text.strip() for cell in cells if cell.text.strip()]
+                        if row_data:
+                            table_rows.append(row_data)
+                    if table_rows:
+                        table_data.append(table_rows)
+                return json.dumps(table_data) if table_data else "No table data found"
+            else:
+                elements = self.driver.find_elements(By.CSS_SELECTOR, selector)
+                text_list = [element.text.strip() for element in elements if element.text.strip()]
+                return "\n".join(text_list) if text_list else "No text found for selector"
+        except Exception as e:
+            return f"Failed to scrape with selector {selector}: {str(e)}"