chipling-api / test.py
Maouu's picture
added a scrape route
f85ff86
raw
history blame
469 Bytes
from curl_cffi import requests as req
from bs4 import BeautifulSoup
import html2text
url = 'https://www.firecrawl.dev/'
# Fetch HTML content
response = req.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
# Optional: Clean up unwanted tags
for tag in soup(['script', 'style', 'noscript', 'svg']):
tag.decompose()
# Extract cleaned HTML
clean_html = str(soup)
# Convert to Markdown
markdown = html2text.html2text(clean_html)
# Output
print(markdown)