Spaces:

Firoj112
/

WebAgents_

Running

App Files Files Community

WebAgents_ / tools /scrape_text.py

Firoj112

Update tools/scrape_text.py

b26ce75 verified 8 days ago

raw

history blame contribute delete

2.31 kB

	from smolagents.tools import Tool
	from helium import S
	from selenium.webdriver.common.by import By
	import json
	import logging

	logger = logging.getLogger(__name__)

	class ScrapeTextTool(Tool):
	name = "scrape_text"
	description = "Scrapes text or table data from elements matching a CSS selector on the current page."
	inputs = {
	"selector": {"type": "string", "default": "p", "nullable": False, "description": "CSS selector to target elements"},
	"extract_table": {"type": "boolean", "default": False, "nullable": False, "description": "If True, extract table data as JSON"}
	}
	output_type = "string"

	def __init__(self, driver):
	super().__init__()
	self.driver = driver
	self.is_initialized = self.driver is not None
	logger.debug(f"ScrapeTextTool initialized: is_initialized={self.is_initialized}")

	def forward(self, selector="p", extract_table=False):
	if not self.is_initialized:
	return "Error: ScrapeTextTool is not initialized"
	try:
	if extract_table:
	tables = self.driver.find_elements(By.CSS_SELECTOR, selector)
	if not tables:
	return "No tables found for selector"
	table_data = []
	for table in tables:
	rows = table.find_elements(By.TAG_NAME, "tr")
	table_rows = []
	for row in rows:
	cells = row.find_elements(By.TAG_NAME, "td") or row.find_elements(By.TAG_NAME, "th")
	row_data = [cell.text.strip() for cell in cells if cell.text.strip()]
	if row_data:
	table_rows.append(row_data)
	if table_rows:
	table_data.append(table_rows)
	return json.dumps(table_data) if table_data else "No table data found"
	else:
	elements = self.driver.find_elements(By.CSS_SELECTOR, selector)
	text_list = [element.text.strip() for element in elements if element.text.strip()]
	return "\n".join(text_list) if text_list else "No text found for selector"
	except Exception as e:
	return f"Failed to scrape with selector {selector}: {str(e)}"