Spaces:

Firoj112
/

WebAgents_

Running

App Files Files Community

WebAgents_ / tools /scrape_text.py

Firoj112

Create scrape_text.py

e9ed5be verified 11 days ago

raw

history blame

2.21 kB

	from smolagents.tools import Tool
	from helium import S
	from selenium.webdriver.common.by import By
	import json

	def scrape_text(driver, selector="p", extract_table=False):
	"""
	Scrape text or table data from elements matching a CSS selector on the current page.

	Args:
	driver: Selenium WebDriver instance
	selector (str): CSS selector to target elements (default: 'p' for paragraphs)
	extract_table (bool): If True, extract table data as JSON (default: False)

	Returns:
	str or dict: Text from elements or JSON table data
	"""
	try:
	if extract_table:
	tables = driver.find_elements(By.CSS_SELECTOR, selector)
	if not tables:
	return "No tables found for selector"
	table_data = []
	for table in tables:
	rows = table.find_elements(By.TAG_NAME, "tr")
	table_rows = []
	for row in rows:
	cells = row.find_elements(By.TAG_NAME, "td") or row.find_elements(By.TAG_NAME, "th")
	row_data = [cell.text.strip() for cell in cells if cell.text.strip()]
	if row_data:
	table_rows.append(row_data)
	if table_rows:
	table_data.append(table_rows)
	return json.dumps(table_data) if table_data else "No table data found"
	else:
	elements = driver.find_elements(By.CSS_SELECTOR, selector)
	text_list = [element.text.strip() for element in elements if element.text.strip()]
	return "\n".join(text_list) if text_list else "No text found for selector"
	except Exception as e:
	return f"Failed to scrape with selector {selector}: {str(e)}"

	# Register the tool
	tool = Tool(
	name="scrape_text",
	description="Scrapes text or table data from elements matching a CSS selector on the current page.",
	inputs={
	"selector": {"type": "str", "default": "p", "description": "CSS selector to target elements"},
	"extract_table": {"type": "bool", "default": False, "description": "If True, extract table data as JSON"}
	},
	output_type="str",
	function=scrape_text
	)