Spaces:

Maouu
/

chipling-api

Running

chipling-api / test.py

added a scrape route

f85ff86 17 days ago

469 Bytes

	from curl_cffi import requests as req
	from bs4 import BeautifulSoup
	import html2text

	url = 'https://www.firecrawl.dev/'

	# Fetch HTML content
	response = req.get(url)
	soup = BeautifulSoup(response.text, 'html.parser')

	# Optional: Clean up unwanted tags
	for tag in soup(['script', 'style', 'noscript', 'svg']):
	tag.decompose()

	# Extract cleaned HTML
	clean_html = str(soup)

	# Convert to Markdown
	markdown = html2text.html2text(clean_html)

	# Output
	print(markdown)